Skip to content
Snippets Groups Projects

Remove code from original repo, small refactoring and packaging

Merged Yoann Schneider requested to merge remove-files-of-old-repo into main
54 files
+ 4011
4092
Compare changes
  • Side-by-side
  • Inline
Files
54
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
The utils module
======================
"""
import argparse
def get_cli_args():
"""
Get the command-line arguments.
:return: The command-line arguments.
"""
parser = argparse.ArgumentParser(
description="Arkindex DAN Training Label Generation"
)
# Required arguments.
parser.add_argument(
"--corpus",
type=str,
help="Name of the corpus from which the data will be retrieved.",
required=True,
)
parser.add_argument(
"--element-type",
nargs="+",
type=str,
help="Type of elements to retrieve",
required=True,
)
parser.add_argument(
"--parents-types",
nargs="+",
type=str,
help="Type of parents of the elements.",
required=True,
)
parser.add_argument(
"--output-dir",
type=str,
help="Path to the output directory.",
required=True,
)
# Optional arguments.
parser.add_argument(
"--parents-names",
nargs="+",
type=str,
help="Names of parents of the elements.",
default=None,
)
parser.add_argument(
"--no-entities",
action="store_true",
help="Extract text without entities")
parser.add_argument(
"--use-existing-split",
action="store_true",
help="Do not partition pages into train/val/test")
parser.add_argument(
"--train-prob",
type=float,
default=0.7,
help="Training set probability")
parser.add_argument(
"--val-prob",
type=float,
default=0.15,
help="Validation set probability")
return parser.parse_args()
Loading