Skip to content
Snippets Groups Projects
Commit 64c5d8d8 authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Do not filter dataset elements

parent 158c7310
No related branches found
No related tags found
1 merge request!89Do not filter dataset elements
Pipeline #165561 passed
......@@ -69,8 +69,8 @@ def get_parser():
filters.add_option("skip_vertical_lines", type=bool, default=False)
# Select
select = parser.add_subparser("select", default={})
select.add_option("dataset", type=str, default=None)
select = parser.add_subparser("select")
select.add_option("dataset", type=str)
select.add_option("element_type", type=str, default=None)
# Format specific
......
......@@ -53,6 +53,9 @@ class SelectArgs(BaseArgs):
def __post_init__(self):
assert UUID(self.dataset)
# Configuration parser issue: https://gitlab.teklia.com/tools/python-toolbox/-/issues/2
if self.element_type == "None":
self.element_type = None
@dataclass
......
......@@ -181,9 +181,7 @@ class DataGenerator:
# Iterate over sets
for split in dataset.sets.split(","):
# Find the dataset elements
for parent in get_dataset_elements(
dataset, split, self.select.element_type
):
for parent in get_dataset_elements(dataset, split):
self.process_parent(parent.element, split)
assert sum(
......
......@@ -8,16 +8,15 @@ from arkindex_export.queries import list_children
from atr_data_generator.extract.arguments import MANUAL
def get_dataset_elements(dataset: Dataset, split: str, type: Optional[str]):
def get_dataset_elements(dataset: Dataset, split: str):
"""
Retrieve dataset elements in a specific split from an SQLite export of an Arkindex corpus
:param dataset: Dataset object from which the elements come.
:param split: Set name of the dataset to use.
:param type: Optionally filter by element type.
:return: The filtered list of dataset elements.
"""
query = (
return (
DatasetElement.select(DatasetElement.element)
.join(Element)
.where(
......@@ -25,10 +24,6 @@ def get_dataset_elements(dataset: Dataset, split: str, type: Optional[str]):
DatasetElement.set_name == split,
)
)
if type:
query = query.where(Element.type == type)
return query
def parse_sources(sources: List[str]):
......
......@@ -5,7 +5,7 @@ The YAML configuration for the `extract` subcommand has 5 sections:
- `common`,
- `image` (optional),
- `filter` (optional),
- `select` (optional).
- `select`.
An example configuration file, filled with the default values when there is one, is available at `examples/extraction.yml`.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment