Skip to content
Snippets Groups Projects

Filter entities by name when extracting data from Arkindex

Merged Manon Blanco requested to merge allow-unknown-entities into main
All threads resolved!
5 files
+ 17
54
Compare changes
  • Side-by-side
  • Inline
Files
5
@@ -40,6 +40,15 @@ def validate_probability(proba):
return proba
def validate_char(char):
if len(char) != 1:
raise argparse.ArgumentTypeError(
f"`{char}` (of length {len(char)}) is not a valid character. Must be a string of length 1."
)
return char
def add_extract_parser(subcommands) -> None:
parser = subcommands.add_parser(
"extract",
@@ -87,14 +96,9 @@ def add_extract_parser(subcommands) -> None:
action="store_true",
help="Extract text with their entities.",
)
parser.add_argument(
"--allow-unknown-entities",
action="store_true",
help="Ignore entities that do not appear in the list of tokens.",
)
parser.add_argument(
"--entity-separators",
type=str,
type=validate_char,
nargs="+",
help="Removes all text that does not appear in an entity or in the list of given characters. Do not give any arguments for keeping the whole text.",
required=False,
Loading