diff --git a/arkindex/dataimport/tasks.py b/arkindex/dataimport/tasks.py index 9d4a21a2336d8e910a3996eb0698315c7163e489..8a6257d256b212427232918e4c94f88d7c32ada4 100644 --- a/arkindex/dataimport/tasks.py +++ b/arkindex/dataimport/tasks.py @@ -214,10 +214,6 @@ def save_ml_results(self, results, **kwargs): if result is None: continue - page.classification = result['classification'] - page.save() - self.report_message("Updated ML results for {}".format(page)) - tr_items = result['zones'] # Parse transcription types for item in tr_items: @@ -235,6 +231,11 @@ def save_ml_results(self, results, **kwargs): index_transcriptions(transcriptions) self.report_message('Indexed transcriptions for {}'.format(page)) + page.build_text() + page.classification = result['classification'] + page.save() + self.report_message("Updated ML results for {}".format(page)) + return list(map(str, results.keys())) diff --git a/arkindex/dataimport/tests/test_tasks.py b/arkindex/dataimport/tests/test_tasks.py index f32dbcc703ee2344b66cef5a1e3b4d7155c92a18..60927626f345ce9d2f3c4947904e52709fee4aa0 100644 --- a/arkindex/dataimport/tests/test_tasks.py +++ b/arkindex/dataimport/tests/test_tasks.py @@ -72,6 +72,7 @@ class TestTasks(FixtureMixin, RedisMockAPITestCase): 'label': 'dog', 'probability': 0.9, }]) + self.assertEqual(dog.text, '') dog_ts = dog.transcriptions.get() self.assertEqual(dog_ts.type, TranscriptionType.Word) self.assertEqual(dog_ts.text, 'woof') @@ -83,6 +84,7 @@ class TestTasks(FixtureMixin, RedisMockAPITestCase): 'label': 'cat', 'probability': 0.8, }]) + self.assertEqual(cat.text, 'meow') cat_ts = cat.transcriptions.get() self.assertEqual(cat_ts.type, TranscriptionType.Line) self.assertEqual(cat_ts.text, 'meow') diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 48937a7061a029c4a781e5398f7ba17fc5261733..895c2828095ce33ec691193d56f76a62d878b09b 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -437,6 +437,15 @@ class Page(Element): # Wait for result self.text = task.get() + def build_text(self): + self.text = '\n'.join( + t.text + for t in sorted( + self.transcriptions.filter(type=TranscriptionType.Line).prefetch_related('zone'), + key=lambda t: (t.zone.polygon.center.y, t.zone.polygon.center.x), + ) + ) + class Act(Element): """ diff --git a/arkindex/documents/tests/test_page.py b/arkindex/documents/tests/test_page.py new file mode 100644 index 0000000000000000000000000000000000000000..ebb16a2767bf2a5039a0693784e2a3b1b9e5a265 --- /dev/null +++ b/arkindex/documents/tests/test_page.py @@ -0,0 +1,42 @@ +from arkindex.project.tests import FixtureTestCase +from arkindex.project.polygon import Polygon +from arkindex.documents.models import Page, TranscriptionType + + +class TestPage(FixtureTestCase): + + def test_build_text(self): + img = self.imgsrv.images.create(path='build_text_test') + page = Page.objects.create( + corpus=self.corpus, + name='Page text test', + zone=img.zones.create( + polygon=Polygon.from_coords(0, 0, 1000, 1000), + ), + ) + # Two lines next to each other + page.transcriptions.create( + type=TranscriptionType.Line, + zone=img.zones.create(polygon=Polygon.from_coords(0, 0, 500, 100)), + text='Ligne 1', + ) + page.transcriptions.create( + type=TranscriptionType.Line, + zone=img.zones.create(polygon=Polygon.from_coords(600, 0, 100, 100)), + text='Ligne 2', + ) + # One line under them + page.transcriptions.create( + type=TranscriptionType.Line, + zone=img.zones.create(polygon=Polygon.from_coords(0, 200, 42, 42)), + text='Ligne 3', + ) + # A word that should not appear + page.transcriptions.create( + type=TranscriptionType.Word, + zone=img.zones.create(polygon=Polygon.from_coords(0, 0, 42, 42)), + text='Mot', + ) + + page.build_text() + self.assertEqual(page.text, "Ligne 1\nLigne 2\nLigne 3")