Skip to content
Snippets Groups Projects
__init__.py 3.67 KiB
Newer Older
Yoann Schneider's avatar
Yoann Schneider committed
# -*- coding: utf-8 -*-
"""
Data extraction
"""
from pathlib import Path
Yoann Schneider's avatar
Yoann Schneider committed
from typing import Optional
Yoann Schneider's avatar
Yoann Schneider committed

Yoann Schneider's avatar
Yoann Schneider committed
from teklia_toolbox.config import ConfigParser

Yoann Schneider's avatar
Yoann Schneider committed
from atr_data_generator.arguments import CommonArgs
from atr_data_generator.extract.arguments import (
    DEFAULT_RESCALE,
Yoann Schneider's avatar
Yoann Schneider committed
    ExtractionMode,
    FilterArgs,
    ImageArgs,
    SelectArgs,
)
Yoann Schneider's avatar
Yoann Schneider committed
from atr_data_generator.extract.base import DataGenerator
from atr_data_generator.extract.pylaia.arguments import PylaiaArgs
from atr_data_generator.extract.pylaia.main import PylaiaDataGenerator
from atr_data_generator.extract.utils import ListedEnum
Yoann Schneider's avatar
Yoann Schneider committed
class Generators(ListedEnum):
    """
    List of supported specific formatters.
    """

    pylaia = PylaiaDataGenerator


Yoann Schneider's avatar
Yoann Schneider committed
def _float(value):
    if value is None:
        return None
    return float(value)


def get_parser():
    parser = ConfigParser()

    # Common arguments
    common = parser.add_subparser("common")
    common.add_option("dataset_name", type=str)
    common.add_option("output_dir", type=Path)
    common.add_option("cache_dir", type=Path, default=Path(".cache"))
    common.add_option("log_parameters", type=bool, default=True)

    # Image arguments
    image = parser.add_subparser("image", default={})
    image.add_option(
        "extraction_mode",
        type=ExtractionMode,
        default=ExtractionMode.deskew_min_area_rect,
    )
    image.add_option("fixed_height", type=int, default=None)
Yoann Schneider's avatar
Yoann Schneider committed
    image.add_option("max_deskew_angle", type=int, default=45)
    image.add_option("skew_angle", type=int, default=0)
    image.add_option("should_rotate", type=bool, default=False)
    image.add_option("grayscale", type=bool, default=True)

    scale = image.add_subparser("scale", default={})
    scale.add_option("x", type=_float, default=DEFAULT_RESCALE)
    scale.add_option("y_top", type=_float, default=DEFAULT_RESCALE)
    scale.add_option("y_bottom", type=_float, default=DEFAULT_RESCALE)
Yoann Schneider's avatar
Yoann Schneider committed

    # Filters
    filters = parser.add_subparser("filter", default={})
Yoann Schneider's avatar
Yoann Schneider committed
    filters.add_option("accepted_worker_version_ids", type=str, many=True, default=[])
Yoann Schneider's avatar
Yoann Schneider committed
    filters.add_option("skip_vertical_lines", type=bool, default=False)

    # Select
    select = parser.add_subparser("select")
    select.add_option("dataset", type=str)
Yoann Schneider's avatar
Yoann Schneider committed
    select.add_option("element_type", type=str, default=None)

Yoann Schneider's avatar
Yoann Schneider committed
    # Format specific
    # Pylaia
    pylaia = parser.add_subparser("pylaia", default={})
    pylaia.add_option("syms_path", type=Path, default=None)

Yoann Schneider's avatar
Yoann Schneider committed
    return parser


def config_parser(configuration_path: Path):
    """
    Returns parsed
    - CommonArgs
    - ImageArgs
    - FilterArgs
    - SelectArgs
Yoann Schneider's avatar
Yoann Schneider committed
    # Format specific args if provided
Yoann Schneider's avatar
Yoann Schneider committed
    """
    config_data = get_parser().parse(configuration_path)
Yoann Schneider's avatar
Yoann Schneider committed
        "common": CommonArgs(**config_data["common"]),
        "image": ImageArgs(**config_data["image"]),
        "filter": FilterArgs(**config_data["filter"]),
        "select": SelectArgs(**config_data["select"]),
        # Format specific
        "pylaia": PylaiaArgs(**config_data["pylaia"]),
Yoann Schneider's avatar
Yoann Schneider committed


def add_extract_subparser(subcommands):
    parser = subcommands.add_parser(
        "extract",
        description=__doc__,
        help=__doc__,
    )
    parser.add_argument("--config", type=Path, help="Configuration file")
    parser.add_argument("--database-path", type=Path, help="Export path")
Yoann Schneider's avatar
Yoann Schneider committed
    parser.add_argument(
        "--format", type=str, choices=Generators.list(), help="Format of the dataset."
    )
Yoann Schneider's avatar
Yoann Schneider committed
    parser.set_defaults(func=main, config_parser=config_parser)
Yoann Schneider's avatar
Yoann Schneider committed


def main(
    database_path: Path,
    format: Optional[str],
    **kwargs,
):
    data_generator = DataGenerator if format is None else Generators[format].value
    data_generator(**kwargs).run(db_path=database_path)