From d8a0229e7b1d836791fc929b189b99d2fbc728af Mon Sep 17 00:00:00 2001 From: manonBlanco <blanco@teklia.com> Date: Wed, 19 Jul 2023 17:20:34 +0200 Subject: [PATCH] Update documentation --- dan/datasets/extract/__init__.py | 2 +- docs/usage/datasets/extract.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dan/datasets/extract/__init__.py b/dan/datasets/extract/__init__.py index 232f4458..82b82a75 100644 --- a/dan/datasets/extract/__init__.py +++ b/dan/datasets/extract/__init__.py @@ -90,7 +90,7 @@ def add_extract_parser(subcommands) -> None: parser.add_argument( "--only-entities", action="store_true", - help="Extract text with their entities and remove all text that does not belong to the tokens.", + help="Remove all text that does not belong to the tokens.", ) parser.add_argument( "--allow-unknown-entities", diff --git a/docs/usage/datasets/extract.md b/docs/usage/datasets/extract.md index edced49f..fb65fcbb 100644 --- a/docs/usage/datasets/extract.md +++ b/docs/usage/datasets/extract.md @@ -12,6 +12,7 @@ Use the `teklia-dan dataset extract` command to extract a dataset from an Arkind | `--parent-element-type` | Type of the parent element containing the data. | `str` | `page` | | `--output` | Folder where the data will be generated. | `Path` | | | `--load-entities` | Extract text with their entities. Needed for NER tasks. | `bool` | `False` | +| `--only-entities` | Remove all text that does not belong to the tokens. | `bool` | `False` | | `--allow-unknown-entities` | Ignore entities that do not appear in the list of tokens. | `bool` | `False` | | `--tokens` | Mapping between starting tokens and end tokens. Needed for NER tasks. | `Path` | | | `--use-existing-split` | Use the specified folder IDs for the dataset split. | `bool` | | -- GitLab