Skip to content
Snippets Groups Projects

Integration of Worker Runs in extraction

Merged Manon Blanco requested to merge extract-worker-run into main
8 files
+ 159
53
Compare changes
  • Side-by-side
  • Inline
Files
8
@@ -15,16 +15,16 @@ from dan.datasets.extract.arkindex import run
MANUAL_SOURCE = "manual"
def parse_worker_version(worker_version_id) -> str | bool:
if worker_version_id == MANUAL_SOURCE:
def parse_source(source) -> str | bool:
if source == MANUAL_SOURCE:
return False
try:
UUID(worker_version_id)
UUID(source)
except ValueError:
raise argparse.ArgumentTypeError(f"`{worker_version_id}` is not a valid UUID.")
raise argparse.ArgumentTypeError(f"`{source}` is not a valid UUID.")
return worker_version_id
return source
def validate_char(char):
@@ -97,18 +97,32 @@ def add_extract_parser(subcommands) -> None:
parser.add_argument(
"--transcription-worker-versions",
type=parse_worker_version,
type=parse_source,
nargs="+",
help=f"Filter transcriptions by worker_version. Use {MANUAL_SOURCE} for manual filtering.",
default=[],
)
parser.add_argument(
"--entity-worker-versions",
type=parse_worker_version,
type=parse_source,
nargs="+",
help=f"Filter transcriptions entities by worker_version. Use {MANUAL_SOURCE} for manual filtering.",
default=[],
)
parser.add_argument(
"--transcription-worker-runs",
type=parse_source,
nargs="+",
help=f"Filter transcriptions by worker_run. Use {MANUAL_SOURCE} for manual filtering.",
default=[],
)
parser.add_argument(
"--entity-worker-runs",
type=parse_source,
nargs="+",
help=f"Filter transcriptions entities by worker_run. Use {MANUAL_SOURCE} for manual filtering.",
default=[],
)
parser.add_argument(
"--subword-vocab-size",
Loading