Skip to content
Snippets Groups Projects
Commit e14367f7 authored by Solene Tarride's avatar Solene Tarride Committed by Mélodie Boillet
Browse files

Fix dataset extraction offset

parent 5641f4c8
No related branches found
No related tags found
1 merge request!42Fix dataset extraction offset
......@@ -22,6 +22,7 @@ def save_json(path, dict):
def insert_token(text, count, start_token, end_token, offset, length):
"""
Insert the given tokens at the right position in the text
start_token or end_token can be empty strings
"""
text = (
# Text before entity
......@@ -35,7 +36,9 @@ def insert_token(text, count, start_token, end_token, offset, length):
# Text after entity
+ text[count + 1 + offset + length :]
)
return text, count + 2
token_offset = len(start_token) + len(end_token)
return text, count + token_offset
def parse_tokens(filename):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment