diff --git a/arkindex_worker/reporting.py b/arkindex_worker/reporting.py index 80259de398f4089a3616ab268689441dd0034023..e69b283f21c28887deb011a18a3297be28c9ec57 100644 --- a/arkindex_worker/reporting.py +++ b/arkindex_worker/reporting.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import json import traceback -import warnings from collections import Counter from datetime import datetime @@ -83,35 +82,12 @@ class Reporter(object): ) element["classifications"] = dict(counter) - def add_transcription(self, element_id, type=None, type_count=None): + def add_transcription(self, element_id, count=1): """ Report creating a transcription on an element. Multiple transcriptions with the same parent can be declared with the type_count parameter. """ - if type_count is None: - if isinstance(type, int): - type_count, type = type, None - else: - type_count = 1 - - if type is not None: - warnings.warn( - "Transcription types have been deprecated and will be removed in the next release.", - FutureWarning, - ) - - self._get_element(element_id)["transcriptions"] += type_count - - def add_transcriptions(self, element_id, transcriptions): - """ - Report one or more transcriptions at once. - """ - assert isinstance(transcriptions, list), "A list is required for transcriptions" - warnings.warn( - "Reporter.add_transcriptions is deprecated due to transcription types being removed. Please use Reporter.add_transcription(element_id, count) instead.", - FutureWarning, - ) - self.add_transcription(element_id, len(transcriptions)) + self._get_element(element_id)["transcriptions"] += count def add_entity(self, element_id, entity_id, type, name): """ diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py index 541cc2869d05bf9b208c7daa3c202342d25c3142..00a3f5507f5340ef784d5be52d714e2413928a81 100644 --- a/arkindex_worker/worker.py +++ b/arkindex_worker/worker.py @@ -6,7 +6,6 @@ import os import sqlite3 import sys import uuid -import warnings from enum import Enum from pathlib import Path @@ -183,14 +182,6 @@ class BaseWorker(object): """Override this method to implement your own process""" -class TranscriptionType(Enum): - Page = "page" - Paragraph = "paragraph" - Line = "line" - Word = "word" - Character = "character" - - class EntityType(Enum): Person = "person" Location = "location" @@ -488,13 +479,9 @@ class ElementsWorker(BaseWorker): return created_ids - def create_transcription(self, element, text, type=None, score=None): + def create_transcription(self, element, text, score): """ Create a transcription on the given element through the API. - - Transcription types are deprecated; please call this method using - `create_transcription(element, text, score)` instead of - `create_transcription(element, text, type, score)`. """ assert element and isinstance( element, Element @@ -503,17 +490,6 @@ class ElementsWorker(BaseWorker): text, str ), "text shouldn't be null and should be of type str" - # When score is not set and type is not a transcription type, grab the score from `type`. - # Allows create_transcription(element, text, score) and (element, text, type, score) - # for forwards compatibility after transcription types get deleted. - if score is None and not isinstance(type, TranscriptionType): - score, type = type, None - elif isinstance(type, TranscriptionType): - warnings.warn( - "Transcription types are deprecated and will be removed in the next release.", - FutureWarning, - ) - assert ( isinstance(score, float) and 0 <= score <= 1 ), "score shouldn't be null and should be a float in [0..1] range" @@ -629,15 +605,9 @@ class ElementsWorker(BaseWorker): return entity["id"] - def create_element_transcriptions( - self, element, sub_element_type, transcription_type=None, transcriptions=None - ): + def create_element_transcriptions(self, element, sub_element_type, transcriptions): """ Create multiple sub elements with their transcriptions on the given element through API - - Transcription types are deprecated; please call this method using - `create_element_transcriptions(element, sub_element_type, transcriptions)` instead of - `create_element_transcriptions(element, sub_element_type, transcription_type, transcriptions)`. """ assert element and isinstance( element, Element @@ -645,20 +615,6 @@ class ElementsWorker(BaseWorker): assert sub_element_type and isinstance( sub_element_type, str ), "sub_element_type shouldn't be null and should be of type str" - - # When transcriptions are not set and transcription_type is not a valid transcription type, - # take transcriptions from `transcription_type`. - # Allows for forwards compatibility after transcription types get deleted. - if transcriptions is None and not isinstance( - transcription_type, TranscriptionType - ): - transcriptions, transcription_type = transcription_type, None - elif isinstance(transcription_type, TranscriptionType): - warnings.warn( - "Transcription types are deprecated and will be removed in the next release.", - FutureWarning, - ) - assert transcriptions and isinstance( transcriptions, list ), "transcriptions shouldn't be null and should be of type list" diff --git a/tests/test_elements_worker/test_transcriptions.py b/tests/test_elements_worker/test_transcriptions.py index 466ee2ae84fc356df6e8abf743a6e8953bd2130b..3d603fcb7f6e548f6398ee9330a47e75194ff2a7 100644 --- a/tests/test_elements_worker/test_transcriptions.py +++ b/tests/test_elements_worker/test_transcriptions.py @@ -5,7 +5,6 @@ import pytest from apistar.exceptions import ErrorResponse from arkindex_worker.models import Element -from arkindex_worker.worker import TranscriptionType TRANSCRIPTIONS_SAMPLE = [ { @@ -44,35 +43,6 @@ def test_create_transcription_wrong_element(mock_elements_worker): assert str(e.value) == "element shouldn't be null and should be of type Element" -def test_create_transcription_type_warning(responses, mock_elements_worker): - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - responses.add( - responses.POST, - f"http://testserver/api/v1/element/{elt.id}/transcription/", - status=200, - ) - - with pytest.warns(FutureWarning) as w: - mock_elements_worker.create_transcription( - element=elt, - text="i am a line", - type=TranscriptionType.Word, - score=0.42, - ) - assert len(w) == 1 - assert ( - w[0].message.args[0] - == "Transcription types are deprecated and will be removed in the next release." - ) - - assert len(responses.calls) == 3 - assert [call.request.url for call in responses.calls] == [ - "http://testserver/api/v1/user/", - "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", - f"http://testserver/api/v1/element/{elt.id}/transcription/", - ] - - def test_create_transcription_wrong_text(mock_elements_worker): elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) @@ -80,7 +50,6 @@ def test_create_transcription_wrong_text(mock_elements_worker): mock_elements_worker.create_transcription( element=elt, text=None, - type=TranscriptionType.Line, score=0.42, ) assert str(e.value) == "text shouldn't be null and should be of type str" @@ -89,7 +58,6 @@ def test_create_transcription_wrong_text(mock_elements_worker): mock_elements_worker.create_transcription( element=elt, text=1234, - type=TranscriptionType.Line, score=0.42, ) assert str(e.value) == "text shouldn't be null and should be of type str" @@ -232,35 +200,6 @@ def test_create_element_transcriptions_wrong_sub_element_type(mock_elements_work ) -def test_create_element_transcriptions_transcription_type_warning( - responses, mock_elements_worker -): - elt = Element({"id": "12341234-1234-1234-1234-123412341234"}) - responses.add( - responses.POST, - f"http://testserver/api/v1/element/{elt.id}/transcriptions/bulk/", - status=200, - json=[ - {"id": "word1_1_1", "created": False}, - {"id": "word1_1_2", "created": False}, - {"id": "word1_1_3", "created": False}, - ], - ) - - with pytest.warns(FutureWarning) as w: - mock_elements_worker.create_element_transcriptions( - element=elt, - sub_element_type="page", - transcription_type=TranscriptionType.Word, - transcriptions=TRANSCRIPTIONS_SAMPLE, - ) - assert len(w) == 1 - assert ( - w[0].message.args[0] - == "Transcription types are deprecated and will be removed in the next release." - ) - - def test_create_element_transcriptions_wrong_transcriptions(mock_elements_worker): elt = Element({"zone": None}) diff --git a/tests/test_reporting.py b/tests/test_reporting.py index af19dd1e6d374aaa7caee5afba893f33451492a2..3332702e1962c98132bb74effa54a813a7195438 100644 --- a/tests/test_reporting.py +++ b/tests/test_reporting.py @@ -126,36 +126,12 @@ def test_add_transcription(): } -def test_add_transcription_warning(): - reporter = Reporter("worker") - - with pytest.warns(FutureWarning) as w: - reporter.add_transcription("myelement", "word") - assert len(w) == 1 - assert ( - w[0].message.args[0] - == "Transcription types have been deprecated and will be removed in the next release." - ) - - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 1, - "classifications": {}, - "entities": [], - "metadata": [], - "errors": [], - } - - def test_add_transcription_count(): """ Report multiple transcriptions with the same element and type """ reporter = Reporter("worker") - reporter.add_transcription("myelement", type_count=1337) + reporter.add_transcription("myelement", 1337) assert "myelement" in reporter.report_data["elements"] element_data = reporter.report_data["elements"]["myelement"] del element_data["started"] @@ -169,39 +145,6 @@ def test_add_transcription_count(): } -def test_add_transcriptions(): - reporter = Reporter("worker") - with pytest.raises(AssertionError): - reporter.add_transcriptions("myelement", {"not": "a list"}) - - with pytest.warns(FutureWarning) as w: - reporter.add_transcriptions("myelement", [{"type": "word"}, {"type": "line"}]) - reporter.add_transcriptions( - "myelement", - [ - {"type": "word"}, - {"type": "line", "text": "something"}, - {"type": "word", "confidence": 0.42}, - ], - ) - assert len(w) == 2 - assert set(warning.message.args[0] for warning in w) == { - "Reporter.add_transcriptions is deprecated due to transcription types being removed. Please use Reporter.add_transcription(element_id, count) instead." - } - - assert "myelement" in reporter.report_data["elements"] - element_data = reporter.report_data["elements"]["myelement"] - del element_data["started"] - assert element_data == { - "elements": {}, - "transcriptions": 5, - "classifications": {}, - "entities": [], - "metadata": [], - "errors": [], - } - - def test_add_entity(): reporter = Reporter("worker") reporter.add_entity(