Skip to content
Snippets Groups Projects
test_transcriptions.py 64.1 KiB
Newer Older
# -*- coding: utf-8 -*-
import json
from uuid import UUID

import pytest
from apistar.exceptions import ErrorResponse
from playhouse.shortcuts import model_to_dict
from arkindex_worker.cache import CachedElement, CachedTranscription
from arkindex_worker.models import Element
from arkindex_worker.worker.transcription import TextOrientation
TRANSCRIPTIONS_SAMPLE = [
    {
        "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
        "confidence": 0.5,
        "text": "The",
    },
    {
        "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
        "confidence": 0.75,
        "text": "first",
    },
    {
        "polygon": [[1000, 300], [1200, 300], [1200, 500], [1000, 500]],
        "confidence": 0.9,
        "text": "line",
    },
]


def test_create_transcription_wrong_element(mock_elements_worker):
    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=None,
            text="i am a line",
    assert (
        str(e.value)
        == "element shouldn't be null and should be an Element or CachedElement"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element="not element type",
            text="i am a line",
    assert (
        str(e.value)
        == "element shouldn't be null and should be an Element or CachedElement"
    )


def test_create_transcription_wrong_text(mock_elements_worker):
    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=elt,
            text=None,
        )
    assert str(e.value) == "text shouldn't be null and should be of type str"

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=elt,
            text=1234,
        )
    assert str(e.value) == "text shouldn't be null and should be of type str"


def test_create_transcription_wrong_confidence(mock_elements_worker):
    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=elt,
            text="i am a line",
        str(e.value)
        == "confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=elt,
            text="i am a line",
            confidence="wrong confidence",
        str(e.value)
        == "confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=elt,
            text="i am a line",
        str(e.value)
        == "confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=elt,
            text="i am a line",
        str(e.value)
        == "confidence shouldn't be null and should be a float in [0..1] range"
def test_create_transcription_default_orientation(responses, mock_elements_worker):
    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
    responses.add(
        responses.POST,
        f"http://testserver/api/v1/element/{elt.id}/transcription/",
        status=200,
        json={
            "id": "56785678-5678-5678-5678-567856785678",
            "text": "Animula vagula blandula",
            "confidence": 0.42,
            "worker_version_id": "12341234-1234-1234-1234-123412341234",
        },
    )
    mock_elements_worker.create_transcription(
        element=elt,
        text="Animula vagula blandula",
    )
    assert json.loads(responses.calls[-1].request.body) == {
        "text": "Animula vagula blandula",
        "worker_version": "12341234-1234-1234-1234-123412341234",
        "confidence": 0.42,
        "orientation": "horizontal-lr",
    }


def test_create_transcription_orientation(responses, mock_elements_worker):
    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
    responses.add(
        responses.POST,
        f"http://testserver/api/v1/element/{elt.id}/transcription/",
        status=200,
        json={
            "id": "56785678-5678-5678-5678-567856785678",
            "text": "Animula vagula blandula",
            "confidence": 0.42,
            "worker_version_id": "12341234-1234-1234-1234-123412341234",
        },
    )
    mock_elements_worker.create_transcription(
        element=elt,
        text="Animula vagula blandula",
        orientation=TextOrientation.VerticalLeftToRight,
    )
    assert json.loads(responses.calls[-1].request.body) == {
        "text": "Animula vagula blandula",
        "worker_version": "12341234-1234-1234-1234-123412341234",
        "confidence": 0.42,
        "orientation": "vertical-lr",
    }


def test_create_transcription_wrong_orientation(mock_elements_worker):
    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcription(
            element=elt,
            text="Animula vagula blandula",
            orientation="eliptical",
        )
    assert (
        str(e.value)
        == "orientation shouldn't be null and should be of type TextOrientation"
    )


def test_create_transcription_api_error(responses, mock_elements_worker):
    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
    responses.add(
        responses.POST,
        f"http://testserver/api/v1/element/{elt.id}/transcription/",
        status=500,
    )

    with pytest.raises(ErrorResponse):
        mock_elements_worker.create_transcription(
            element=elt,
            text="i am a line",
    assert len(responses.calls) == len(BASE_API_CALLS) + 5
    assert [
        (call.request.method, call.request.url) for call in responses.calls
    ] == BASE_API_CALLS + [
        # We retry 5 times the API call
        ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
        ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
        ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
        ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
        ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
def test_create_transcription(responses, mock_elements_worker):
    elt = Element({"id": "12341234-1234-1234-1234-123412341234"})
    responses.add(
        responses.POST,
        f"http://testserver/api/v1/element/{elt.id}/transcription/",
        status=200,
        json={
            "id": "56785678-5678-5678-5678-567856785678",
            "text": "i am a line",
            "confidence": 0.42,
            "worker_version_id": "12341234-1234-1234-1234-123412341234",
        },
    )

    mock_elements_worker.create_transcription(
        element=elt,
        text="i am a line",
    assert len(responses.calls) == len(BASE_API_CALLS) + 1
    assert [
        (call.request.method, call.request.url) for call in responses.calls
    ] == BASE_API_CALLS + [
        ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
    assert json.loads(responses.calls[-1].request.body) == {
        "text": "i am a line",
        "worker_version": "12341234-1234-1234-1234-123412341234",
        "confidence": 0.42,
        "orientation": "horizontal-lr",
    }


def test_create_transcription_with_cache(responses, mock_elements_worker_with_cache):
    elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")

    responses.add(
        responses.POST,
        f"http://testserver/api/v1/element/{elt.id}/transcription/",
        status=200,
        json={
            "id": "56785678-5678-5678-5678-567856785678",
            "text": "i am a line",
            "confidence": 0.42,
            "orientation": "horizontal-lr",
            "worker_version_id": "12341234-1234-1234-1234-123412341234",
        },
    mock_elements_worker_with_cache.create_transcription(
        element=elt,
        text="i am a line",
    assert len(responses.calls) == len(BASE_API_CALLS) + 1
    assert [
        (call.request.method, call.request.url) for call in responses.calls
    ] == BASE_API_CALLS + [
        ("POST", f"http://testserver/api/v1/element/{elt.id}/transcription/"),
    assert json.loads(responses.calls[-1].request.body) == {
        "text": "i am a line",
        "worker_version": "12341234-1234-1234-1234-123412341234",
        "orientation": "horizontal-lr",
        "confidence": 0.42,
    # Check that created transcription was properly stored in SQLite cache
    assert list(CachedTranscription.select()) == [
        CachedTranscription(
            id=UUID("56785678-5678-5678-5678-567856785678"),
            element_id=UUID(elt.id),
            text="i am a line",
            confidence=0.42,
            orientation=TextOrientation.HorizontalLeftToRight,
            worker_version_id=UUID("12341234-1234-1234-1234-123412341234"),
def test_create_transcription_orientation_with_cache(
    responses, mock_elements_worker_with_cache
):
    elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing")
    responses.add(
        responses.POST,
        f"http://testserver/api/v1/element/{elt.id}/transcription/",
        status=200,
        json={
            "id": "56785678-5678-5678-5678-567856785678",
            "text": "Animula vagula blandula",
            "confidence": 0.42,
            "orientation": "vertical-lr",
            "worker_version_id": "12341234-1234-1234-1234-123412341234",
        },
    )
    mock_elements_worker_with_cache.create_transcription(
        element=elt,
        text="Animula vagula blandula",
        orientation=TextOrientation.VerticalLeftToRight,
    )
    assert json.loads(responses.calls[-1].request.body) == {
        "text": "Animula vagula blandula",
        "worker_version": "12341234-1234-1234-1234-123412341234",
        "orientation": "vertical-lr",
        "confidence": 0.42,
    }
    # Check that the text orientation was properly stored in SQLite cache
    assert list(map(model_to_dict, CachedTranscription.select())) == [
        {
            "id": UUID("56785678-5678-5678-5678-567856785678"),
            "element": {
                "id": UUID("12341234-1234-1234-1234-123412341234"),
                "parent_id": None,
                "type": "thing",
                "image": None,
                "polygon": None,
                "rotation_angle": 0,
                "mirrored": False,
                "initial": False,
                "worker_version_id": None,
            },
            "text": "Animula vagula blandula",
            "confidence": 0.42,
            "orientation": TextOrientation.VerticalLeftToRight.value,
            "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
        }
    ]


def test_create_transcriptions_wrong_transcriptions(mock_elements_worker):
    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=None,
        )
    assert str(e.value) == "transcriptions shouldn't be null and should be of type list"

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=1234,
        )
    assert str(e.value) == "transcriptions shouldn't be null and should be of type list"

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: element_id shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": None,
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": 1234,
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "word",
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "word",
                    "confidence": None,
        == "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "word",
                    "confidence": "a wrong confidence",
        == "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "word",
        == "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "word",
                    "confidence": 2.00,
        == "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_transcriptions(
            transcriptions=[
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "confidence": 0.75,
                },
                {
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "word",
                    "confidence": 0.28,
                    "orientation": "wobble",
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: orientation shouldn't be null and should be of type TextOrientation"
    )


def test_create_transcriptions_api_error(responses, mock_elements_worker):
    responses.add(
        responses.POST,
        "http://testserver/api/v1/transcription/bulk/",
        status=500,
    )
    trans = [
        {
            "element_id": "11111111-1111-1111-1111-111111111111",
            "text": "The",
            "confidence": 0.75,
        },
        {
            "element_id": "11111111-1111-1111-1111-111111111111",
            "text": "word",
            "confidence": 0.42,
        },
    ]

    with pytest.raises(ErrorResponse):
        mock_elements_worker.create_transcriptions(transcriptions=trans)

    assert len(responses.calls) == len(BASE_API_CALLS) + 5
    assert [
        (call.request.method, call.request.url) for call in responses.calls
    ] == BASE_API_CALLS + [
        # We retry 5 times the API call
        ("POST", "http://testserver/api/v1/transcription/bulk/"),
        ("POST", "http://testserver/api/v1/transcription/bulk/"),
        ("POST", "http://testserver/api/v1/transcription/bulk/"),
        ("POST", "http://testserver/api/v1/transcription/bulk/"),
        ("POST", "http://testserver/api/v1/transcription/bulk/"),
    ]


def test_create_transcriptions(responses, mock_elements_worker_with_cache):
    CachedElement.create(id="11111111-1111-1111-1111-111111111111", type="thing")
    trans = [
        {
            "element_id": "11111111-1111-1111-1111-111111111111",
            "text": "The",
            "confidence": 0.75,
        },
        {
            "element_id": "11111111-1111-1111-1111-111111111111",
            "text": "word",
            "confidence": 0.42,
        },
    ]

    responses.add(
        responses.POST,
        "http://testserver/api/v1/transcription/bulk/",
        status=200,
        json={
            "worker_version": "12341234-1234-1234-1234-123412341234",
            "transcriptions": [
                {
                    "id": "00000000-0000-0000-0000-000000000000",
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "The",
                    "orientation": "horizontal-lr",
                    "confidence": 0.75,
                },
                {
                    "id": "11111111-1111-1111-1111-111111111111",
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "word",
                    "orientation": "horizontal-lr",
                    "confidence": 0.42,
                },
            ],
        },
    )

    mock_elements_worker_with_cache.create_transcriptions(
        transcriptions=trans,
    )

    assert len(responses.calls) == len(BASE_API_CALLS) + 1
    assert [
        (call.request.method, call.request.url) for call in responses.calls
    ] == BASE_API_CALLS + [
        ("POST", "http://testserver/api/v1/transcription/bulk/"),
    assert json.loads(responses.calls[-1].request.body) == {
        "worker_version": "12341234-1234-1234-1234-123412341234",
        "transcriptions": [
            {
                "element_id": "11111111-1111-1111-1111-111111111111",
                "text": "The",
                "confidence": 0.75,
                "orientation": TextOrientation.HorizontalLeftToRight.value,
            },
            {
                "element_id": "11111111-1111-1111-1111-111111111111",
                "text": "word",
                "confidence": 0.42,
                "orientation": TextOrientation.HorizontalLeftToRight.value,
            },
        ],
    }

    # Check that created transcriptions were properly stored in SQLite cache
    assert list(CachedTranscription.select()) == [
            id=UUID("00000000-0000-0000-0000-000000000000"),
            element_id=UUID("11111111-1111-1111-1111-111111111111"),
            orientation=TextOrientation.HorizontalLeftToRight,
            worker_version_id=UUID("12341234-1234-1234-1234-123412341234"),
            id=UUID("11111111-1111-1111-1111-111111111111"),
            element_id=UUID("11111111-1111-1111-1111-111111111111"),
            orientation=TextOrientation.HorizontalLeftToRight,
            worker_version_id=UUID("12341234-1234-1234-1234-123412341234"),
def test_create_transcriptions_orientation(responses, mock_elements_worker_with_cache):
    CachedElement.create(id="11111111-1111-1111-1111-111111111111", type="thing")
    trans = [
        {
            "element_id": "11111111-1111-1111-1111-111111111111",
            "text": "Animula vagula blandula",
            "confidence": 0.12,
            "orientation": TextOrientation.HorizontalRightToLeft,
        },
        {
            "element_id": "11111111-1111-1111-1111-111111111111",
            "text": "Hospes comesque corporis",
            "confidence": 0.21,
            "orientation": TextOrientation.VerticalLeftToRight,
        },
    ]

    responses.add(
        responses.POST,
        "http://testserver/api/v1/transcription/bulk/",
        status=200,
        json={
            "worker_version": "12341234-1234-1234-1234-123412341234",
            "transcriptions": [
                {
                    "id": "00000000-0000-0000-0000-000000000000",
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "Animula vagula blandula",
                    "orientation": "horizontal-rl",
                    "confidence": 0.12,
                },
                {
                    "id": "11111111-1111-1111-1111-111111111111",
                    "element_id": "11111111-1111-1111-1111-111111111111",
                    "text": "Hospes comesque corporis",
                    "orientation": "vertical-lr",
                    "confidence": 0.21,
                },
            ],
        },
    )

    mock_elements_worker_with_cache.create_transcriptions(
        transcriptions=trans,
    )

    assert json.loads(responses.calls[-1].request.body) == {
        "worker_version": "12341234-1234-1234-1234-123412341234",
        "transcriptions": [
            {
                "element_id": "11111111-1111-1111-1111-111111111111",
                "text": "Animula vagula blandula",
                "confidence": 0.12,
                "orientation": TextOrientation.HorizontalRightToLeft.value,
            },
            {
                "element_id": "11111111-1111-1111-1111-111111111111",
                "text": "Hospes comesque corporis",
                "confidence": 0.21,
                "orientation": TextOrientation.VerticalLeftToRight.value,
            },
        ],
    }

    # Check that oriented transcriptions were properly stored in SQLite cache
    assert list(map(model_to_dict, CachedTranscription.select())) == [
        {
            "id": UUID("00000000-0000-0000-0000-000000000000"),
            "element": {
                "id": UUID("11111111-1111-1111-1111-111111111111"),
                "parent_id": None,
                "type": "thing",
                "image": None,
                "polygon": None,
                "rotation_angle": 0,
                "mirrored": False,
                "initial": False,
                "worker_version_id": None,
            },
            "text": "Animula vagula blandula",
            "confidence": 0.12,
            "orientation": TextOrientation.HorizontalRightToLeft.value,
            "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
        },
        {
            "id": UUID("11111111-1111-1111-1111-111111111111"),
            "element": {
                "id": UUID("11111111-1111-1111-1111-111111111111"),
                "parent_id": None,
                "type": "thing",
                "image": None,
                "polygon": None,
                "rotation_angle": 0,
                "mirrored": False,
                "initial": False,
                "worker_version_id": None,
            },
            "text": "Hospes comesque corporis",
            "confidence": 0.21,
            "orientation": TextOrientation.VerticalLeftToRight.value,
            "worker_version_id": UUID("12341234-1234-1234-1234-123412341234"),
        },
    ]


def test_create_element_transcriptions_wrong_element(mock_elements_worker):
    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=None,
            sub_element_type="page",
            transcriptions=TRANSCRIPTIONS_SAMPLE,
        )
    assert (
        str(e.value)
        == "element shouldn't be null and should be an Element or CachedElement"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element="not element type",
            sub_element_type="page",
            transcriptions=TRANSCRIPTIONS_SAMPLE,
        )
    assert (
        str(e.value)
        == "element shouldn't be null and should be an Element or CachedElement"
    )


def test_create_element_transcriptions_wrong_sub_element_type(mock_elements_worker):
    elt = Element({"zone": None})

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type=None,
            transcriptions=TRANSCRIPTIONS_SAMPLE,
        )
    assert (
        str(e.value) == "sub_element_type shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type=1234,
            transcriptions=TRANSCRIPTIONS_SAMPLE,
        )
    assert (
        str(e.value) == "sub_element_type shouldn't be null and should be of type str"
    )


def test_create_element_transcriptions_wrong_transcriptions(mock_elements_worker):
    elt = Element({"zone": None})

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type="page",
            transcriptions=None,
        )
    assert str(e.value) == "transcriptions shouldn't be null and should be of type list"

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type="page",
            transcriptions=1234,
        )
    assert str(e.value) == "transcriptions shouldn't be null and should be of type list"

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type="page",
            transcriptions=[
                {
                    "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
                    "confidence": 0.75,
                    "text": "The",
                },
                {
                    "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type="page",
            transcriptions=[
                {
                    "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
                    "confidence": 0.75,
                    "text": "The",
                },
                {
                    "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
                    "text": None,
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type="page",
            transcriptions=[
                {
                    "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
                    "confidence": 0.75,
                    "text": "The",
                },
                {
                    "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
                    "text": 1234,
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: text shouldn't be null and should be of type str"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type="page",
            transcriptions=[
                {
                    "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
                    "confidence": 0.75,
                    "text": "The",
                },
                {
                    "polygon": [[100, 150], [700, 150], [700, 200], [100, 200]],
                    "text": "word",
                },
            ],
        )
    assert (
        str(e.value)
        == "Transcription at index 1 in transcriptions: confidence shouldn't be null and should be a float in [0..1] range"
    )

    with pytest.raises(AssertionError) as e:
        mock_elements_worker.create_element_transcriptions(
            element=elt,
            sub_element_type="page",
            transcriptions=[
                {
                    "polygon": [[0, 0], [2000, 0], [2000, 3000], [0, 3000]],
                    "confidence": 0.75,