Skip to content
Snippets Groups Projects
Commit 93e62014 authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Add trigger and test

parent 449d0891
No related branches found
No related tags found
No related merge requests found
import json
import os
import sqlite3
from arkindex.dataimport.models import WorkerVersion
from arkindex.documents.export import export_corpus
from arkindex.documents.models import (
Classification,
ElementPath,
EntityLink,
EntityType,
MetaData,
Transcription,
TranscriptionEntity,
)
from arkindex.images.models import Image
from arkindex.project.tests import FixtureTestCase
class TestExport(FixtureTestCase):
def test_export(self):
element = self.corpus.elements.get(name='Volume 1')
transcription = Transcription.objects.first()
version = WorkerVersion.objects.get(worker__slug='reco')
element.classifications.create(
ml_class=self.corpus.ml_classes.create(name='Blah'),
confidence=.55555555,
)
entity1 = self.corpus.entities.create(
name='Arrokuda',
type=EntityType.Location,
metas={'subtype': 'pokemon'},
)
entity2 = self.corpus.entities.create(
name='Stonjourner',
type=EntityType.Person,
validated=True,
moderator=self.superuser,
)
role = self.corpus.roles.create(
parent_name='parent',
child_name='child',
parent_type=EntityType.Location,
child_type=EntityType.Person,
)
role.links.create(parent=entity1, child=entity2)
transcription.transcription_entities.create(
entity=entity1,
offset=1,
length=1,
version=version,
)
db_path = export_corpus(self.corpus.id)
db = sqlite3.connect(db_path)
self.assertCountEqual(
db.execute('SELECT id, url, width, height FROM image').fetchall(),
[
(
str(image.id),
image.url,
image.width,
image.height
)
for image in Image.objects.all()
]
)
self.assertCountEqual(
db.execute('SELECT id, name, slug, type, revision FROM worker_version').fetchall(),
[
(
str(version.id),
version.worker.name,
version.worker.slug,
version.worker.type,
version.revision.hash
)
]
)
actual_rows = db.execute("SELECT id, created, updated, name, type, worker_version_id, image_id, polygon FROM element").fetchall()
for i in range(len(actual_rows)):
# Convert the row from a tuple to a list because we'll change it
row = list(actual_rows[i])
if row[-1] is not None:
# Parse the polygons as JSON for easier comparison
row[-1] = json.loads(row[-1])
actual_rows[i] = row
expected_rows = []
for element in self.corpus.elements.all():
row = [
str(element.id),
element.created.timestamp(),
element.updated.timestamp(),
element.name,
element.type.slug,
]
if element.worker_version_id:
row.append(str(element.worker_version_id))
else:
row.append(None)
if element.zone:
row.append(str(element.zone.image_id))
row.append([
# coords returns a list of tuples of floats, we turn it into a list of lists of ints
[int(x), int(y)] for x, y in element.zone.polygon.coords
])
else:
row.extend([None, None])
expected_rows.append(row)
self.assertCountEqual(actual_rows, expected_rows)
self.assertCountEqual(
db.execute("SELECT id, parent_id, child_id, ordering FROM element_path").fetchall(),
[
(
str(id),
str(parent_id),
str(child_id),
ordering
)
for id, parent_id, child_id, ordering
in ElementPath.objects
.filter(element__corpus=self.corpus)
.values_list('id', 'path__last', 'element_id', 'ordering')
]
)
self.assertCountEqual(
db.execute("SELECT id, element_id, text, confidence, worker_version_id FROM transcription").fetchall(),
[
(
str(transcription.id),
str(transcription.element_id),
transcription.text,
transcription.confidence,
str(transcription.worker_version_id) if transcription.worker_version_id else None
)
for transcription in Transcription.objects.filter(element__corpus=self.corpus)
]
)
self.assertCountEqual(
db.execute("SELECT id, element_id, class_name, state, moderator, confidence, high_confidence, worker_version_id FROM classification").fetchall(),
[
(
str(classification.id),
str(classification.element_id),
classification.ml_class.name,
classification.state.value,
classification.moderator.email if classification.moderator else None,
classification.confidence,
int(classification.high_confidence),
str(classification.worker_version_id) if classification.worker_version_id else None
)
for classification in Classification.objects.filter(element__corpus=self.corpus)
]
)
self.assertCountEqual(
db.execute("SELECT id, element_id, name, type, value, entity_id, worker_version_id FROM metadata").fetchall(),
[
(
str(metadata.id),
str(metadata.element_id),
metadata.name,
metadata.type.value,
metadata.value,
str(metadata.entity_id) if metadata.entity_id else None,
str(metadata.worker_version_id) if metadata.worker_version_id else None
)
for metadata in MetaData.objects.filter(element__corpus=self.corpus)
]
)
self.assertCountEqual(
db.execute("SELECT id, name, type, validated, moderator, metas, worker_version_id FROM entity").fetchall(),
[
(
str(entity.id),
entity.name,
entity.type.value,
int(entity.validated),
entity.moderator.email if entity.moderator else None,
json.dumps(entity.metas) if entity.metas else None,
str(entity.worker_version_id) if entity.worker_version_id else None,
)
for entity in self.corpus.entities.all()
]
)
self.assertCountEqual(
db.execute("SELECT id, parent_name, child_name, parent_type, child_type FROM entity_role").fetchall(),
[
(
str(role.id),
role.parent_name,
role.child_name,
role.parent_type.value,
role.child_type.value,
)
for role in self.corpus.roles.all()
]
)
self.assertCountEqual(
db.execute("SELECT id, parent_id, child_id, role_id FROM entity_link").fetchall(),
[
(
str(link.id),
str(link.parent_id),
str(link.child_id),
str(link.role_id)
)
for link in EntityLink.objects.filter(role__corpus=self.corpus)
]
)
self.assertCountEqual(
db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id FROM transcription_entity").fetchall(),
[
(
str(transcription_entity.id),
str(transcription_entity.transcription_id),
str(transcription_entity.entity_id),
transcription_entity.offset,
transcription_entity.length,
str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None
)
for transcription_entity in TranscriptionEntity.objects.filter(entity__corpus=self.corpus)
]
)
os.unlink(db_path)
......@@ -7,7 +7,7 @@ from uuid import UUID
from django.conf import settings
from arkindex.dataimport.models import DataImport, WorkerVersion
from arkindex.documents import tasks
from arkindex.documents import export, tasks
from arkindex.documents.managers import ElementQuerySet
from arkindex.documents.models import Corpus, Element, Entity
......@@ -142,3 +142,14 @@ def initialize_activity(process: DataImport):
Initialize activity on every process elements for worker versions that are part of its workflow
"""
tasks.initialize_activity.delay(process)
def export_corpus(corpus: Corpus, user_id: Optional[int] = None) -> None:
"""
Export a corpus to a SQLite database
"""
export.export_corpus.delay(
corpus_id=corpus.id,
user_id=user_id,
description=f'Export of corpus {corpus.name}'
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment