Skip to content
Snippets Groups Projects
test_db.py 2.60 KiB
# -*- coding: utf-8 -*-

import pytest

from dan.datasets.extract.db import (
    Element,
    Entity,
    Transcription,
    get_elements,
    get_transcription_entities,
    get_transcriptions,
)


def test_get_elements():
    """
    Assert elements retrieval output against verified results
    """
    elements = get_elements(
        parent_id="d2b9fe93-3198-42de-8c07-f4ab67990e21",
        element_type="page",
    )

    # Check number of results
    assert len(elements) == 4
    assert all(isinstance(element, Element) for element in elements)

    # ID verification
    assert [element.id for element in elements] == [
        "0c8c62ef-3a2b-4b7b-a1bb-b0048864ee08",
        "e03c645b-017d-4946-b65c-491eeeb6888b",
        "e1676371-0d3e-44cc-a6e4-c4f33af878a9",
        "bc60dfc9-f180-48b0-873c-ba7629d4f6d8",
    ]


@pytest.mark.parametrize(
    "worker_version", (False, "0b2a429a-0da2-4b79-a6bb-330c6a07ac60")
)
def test_get_transcriptions(worker_version):
    """
    Assert transcriptions retrieval output against verified results
    """
    element_id = "a3bf4b60-a149-49b4-80dd-5fbe27137efa"
    transcriptions = get_transcriptions(
        element_id=element_id,
        transcription_worker_version=worker_version,
    )

    # Check number of results
    assert len(transcriptions) == 1
    transcription = transcriptions.pop()
    assert isinstance(transcription, Transcription)

    # Common keys
    assert transcription.text == "[ T 8º SUP 26200"

    # Differences
    if worker_version:
        assert transcription.id == "3bd248d6-998a-4579-a00c-d4639f3825aa"
    else:
        assert transcription.id == "c551960a-0f82-4779-b975-77a457bcf273"


@pytest.mark.parametrize(
    "worker_version", (False, "0e2a98f5-71ac-48f6-973b-cc10ed440965")
)
def test_get_transcription_entities(worker_version):
    transcription_id = "3bd248d6-998a-4579-a00c-d4639f3825aa"
    entities = get_transcription_entities(
        transcription_id=transcription_id,
        entity_worker_version=worker_version,
    )

    # Check number of results
    assert len(entities) == 1
    transcription_entity = entities.pop()
    assert isinstance(transcription_entity, Entity)

    # Differences
    if worker_version:
        assert transcription_entity.type == "cote"
        assert transcription_entity.value == "T 8 º SUP 26200"
        assert transcription_entity.offset == 2
        assert transcription_entity.length == 14
    else:
        assert transcription_entity.type == "Cote"
        assert transcription_entity.value == "[ T 8º SUP 26200"
        assert transcription_entity.offset == 0
        assert transcription_entity.length == 16