Skip to content
Snippets Groups Projects
__init__.py 2.9 KiB
Newer Older
Yoann Schneider's avatar
Yoann Schneider committed
# -*- coding: utf-8 -*-
"""
Data extraction
"""
import uuid
from pathlib import Path

Yoann Schneider's avatar
Yoann Schneider committed
from teklia_toolbox.config import ConfigParser

Yoann Schneider's avatar
Yoann Schneider committed
from atr_data_generator.arguments import CommonArgs
from atr_data_generator.extract.arguments import (
    DEFAULT_RESCALE,
Yoann Schneider's avatar
Yoann Schneider committed
    ExtractionMode,
    FilterArgs,
    ImageArgs,
    SelectArgs,
)
from atr_data_generator.extract.base import main
Yoann Schneider's avatar
Yoann Schneider committed


def _float(value):
    if value is None:
        return None
    return float(value)


def get_parser():
    parser = ConfigParser()

    # Common arguments
    common = parser.add_subparser("common")
    common.add_option("dataset_name", type=str)
    common.add_option("output_dir", type=Path)
    common.add_option("cache_dir", type=Path, default=Path(".cache"))
    common.add_option("log_parameters", type=bool, default=True)

    # Image arguments
    image = parser.add_subparser("image", default={})
    image.add_option(
        "extraction_mode",
        type=ExtractionMode,
        default=ExtractionMode.deskew_min_area_rect,
    )
    image.add_option("fixed_height", type=int, default=None)
Yoann Schneider's avatar
Yoann Schneider committed
    image.add_option("max_deskew_angle", type=int, default=45)
    image.add_option("skew_angle", type=int, default=0)
    image.add_option("should_rotate", type=bool, default=False)
    image.add_option("grayscale", type=bool, default=True)

    scale = image.add_subparser("scale", default={})
    scale.add_option("x", type=_float, default=DEFAULT_RESCALE)
    scale.add_option("y_top", type=_float, default=DEFAULT_RESCALE)
    scale.add_option("y_bottom", type=_float, default=DEFAULT_RESCALE)
Yoann Schneider's avatar
Yoann Schneider committed

    # Filters
    filters = parser.add_subparser("filter", default={})
Yoann Schneider's avatar
Yoann Schneider committed
    filters.add_option(
        "accepted_worker_version_ids", type=uuid.UUID, many=True, default=[]
    )
    filters.add_option("skip_vertical_lines", type=bool, default=False)

    # Select
    select = parser.add_subparser("select", default={})
    select.add_option("folders", type=uuid.UUID, many=True, default=[])
    select.add_option("parent_type", type=str, default=None)
    select.add_option("element_type", type=str, default=None)

    return parser


def config_parser(configuration_path: Path):
    """
    Returns parsed
    - CommonArgs
    - ImageArgs
    - FilterArgs
    - SelectArgs
    """
    config_data = get_parser().parse(configuration_path)
    common = CommonArgs(**config_data["common"])
    image = ImageArgs(**config_data["image"])
    filters = FilterArgs(**config_data["filter"])
    select = SelectArgs(**config_data["select"])

    return {"common": common, "image": image, "filters": filters, "select": select}


def add_extract_subparser(subcommands):
    parser = subcommands.add_parser(
        "extract",
        description=__doc__,
        help=__doc__,
    )
    parser.add_argument("--config", type=Path, help="Configuration file")
    parser.add_argument("--database-path", type=Path, help="Export path")
Yoann Schneider's avatar
Yoann Schneider committed
    parser.set_defaults(func=main, config_parser=config_parser)