Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
Backend
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Arkindex
Backend
Commits
93e62014
Commit
93e62014
authored
3 years ago
by
Erwan Rouchet
Committed by
Bastien Abadie
3 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Add trigger and test
parent
449d0891
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
arkindex/documents/tests/tasks/test_export.py
+246
-0
246 additions, 0 deletions
arkindex/documents/tests/tasks/test_export.py
arkindex/project/triggers.py
+12
-1
12 additions, 1 deletion
arkindex/project/triggers.py
with
258 additions
and
1 deletion
arkindex/documents/tests/tasks/test_export.py
0 → 100644
+
246
−
0
View file @
93e62014
import
json
import
os
import
sqlite3
from
arkindex.dataimport.models
import
WorkerVersion
from
arkindex.documents.export
import
export_corpus
from
arkindex.documents.models
import
(
Classification
,
ElementPath
,
EntityLink
,
EntityType
,
MetaData
,
Transcription
,
TranscriptionEntity
,
)
from
arkindex.images.models
import
Image
from
arkindex.project.tests
import
FixtureTestCase
class
TestExport
(
FixtureTestCase
):
def
test_export
(
self
):
element
=
self
.
corpus
.
elements
.
get
(
name
=
'
Volume 1
'
)
transcription
=
Transcription
.
objects
.
first
()
version
=
WorkerVersion
.
objects
.
get
(
worker__slug
=
'
reco
'
)
element
.
classifications
.
create
(
ml_class
=
self
.
corpus
.
ml_classes
.
create
(
name
=
'
Blah
'
),
confidence
=
.
55555555
,
)
entity1
=
self
.
corpus
.
entities
.
create
(
name
=
'
Arrokuda
'
,
type
=
EntityType
.
Location
,
metas
=
{
'
subtype
'
:
'
pokemon
'
},
)
entity2
=
self
.
corpus
.
entities
.
create
(
name
=
'
Stonjourner
'
,
type
=
EntityType
.
Person
,
validated
=
True
,
moderator
=
self
.
superuser
,
)
role
=
self
.
corpus
.
roles
.
create
(
parent_name
=
'
parent
'
,
child_name
=
'
child
'
,
parent_type
=
EntityType
.
Location
,
child_type
=
EntityType
.
Person
,
)
role
.
links
.
create
(
parent
=
entity1
,
child
=
entity2
)
transcription
.
transcription_entities
.
create
(
entity
=
entity1
,
offset
=
1
,
length
=
1
,
version
=
version
,
)
db_path
=
export_corpus
(
self
.
corpus
.
id
)
db
=
sqlite3
.
connect
(
db_path
)
self
.
assertCountEqual
(
db
.
execute
(
'
SELECT id, url, width, height FROM image
'
).
fetchall
(),
[
(
str
(
image
.
id
),
image
.
url
,
image
.
width
,
image
.
height
)
for
image
in
Image
.
objects
.
all
()
]
)
self
.
assertCountEqual
(
db
.
execute
(
'
SELECT id, name, slug, type, revision FROM worker_version
'
).
fetchall
(),
[
(
str
(
version
.
id
),
version
.
worker
.
name
,
version
.
worker
.
slug
,
version
.
worker
.
type
,
version
.
revision
.
hash
)
]
)
actual_rows
=
db
.
execute
(
"
SELECT id, created, updated, name, type, worker_version_id, image_id, polygon FROM element
"
).
fetchall
()
for
i
in
range
(
len
(
actual_rows
)):
# Convert the row from a tuple to a list because we'll change it
row
=
list
(
actual_rows
[
i
])
if
row
[
-
1
]
is
not
None
:
# Parse the polygons as JSON for easier comparison
row
[
-
1
]
=
json
.
loads
(
row
[
-
1
])
actual_rows
[
i
]
=
row
expected_rows
=
[]
for
element
in
self
.
corpus
.
elements
.
all
():
row
=
[
str
(
element
.
id
),
element
.
created
.
timestamp
(),
element
.
updated
.
timestamp
(),
element
.
name
,
element
.
type
.
slug
,
]
if
element
.
worker_version_id
:
row
.
append
(
str
(
element
.
worker_version_id
))
else
:
row
.
append
(
None
)
if
element
.
zone
:
row
.
append
(
str
(
element
.
zone
.
image_id
))
row
.
append
([
# coords returns a list of tuples of floats, we turn it into a list of lists of ints
[
int
(
x
),
int
(
y
)]
for
x
,
y
in
element
.
zone
.
polygon
.
coords
])
else
:
row
.
extend
([
None
,
None
])
expected_rows
.
append
(
row
)
self
.
assertCountEqual
(
actual_rows
,
expected_rows
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, parent_id, child_id, ordering FROM element_path
"
).
fetchall
(),
[
(
str
(
id
),
str
(
parent_id
),
str
(
child_id
),
ordering
)
for
id
,
parent_id
,
child_id
,
ordering
in
ElementPath
.
objects
.
filter
(
element__corpus
=
self
.
corpus
)
.
values_list
(
'
id
'
,
'
path__last
'
,
'
element_id
'
,
'
ordering
'
)
]
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, element_id, text, confidence, worker_version_id FROM transcription
"
).
fetchall
(),
[
(
str
(
transcription
.
id
),
str
(
transcription
.
element_id
),
transcription
.
text
,
transcription
.
confidence
,
str
(
transcription
.
worker_version_id
)
if
transcription
.
worker_version_id
else
None
)
for
transcription
in
Transcription
.
objects
.
filter
(
element__corpus
=
self
.
corpus
)
]
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, element_id, class_name, state, moderator, confidence, high_confidence, worker_version_id FROM classification
"
).
fetchall
(),
[
(
str
(
classification
.
id
),
str
(
classification
.
element_id
),
classification
.
ml_class
.
name
,
classification
.
state
.
value
,
classification
.
moderator
.
email
if
classification
.
moderator
else
None
,
classification
.
confidence
,
int
(
classification
.
high_confidence
),
str
(
classification
.
worker_version_id
)
if
classification
.
worker_version_id
else
None
)
for
classification
in
Classification
.
objects
.
filter
(
element__corpus
=
self
.
corpus
)
]
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, element_id, name, type, value, entity_id, worker_version_id FROM metadata
"
).
fetchall
(),
[
(
str
(
metadata
.
id
),
str
(
metadata
.
element_id
),
metadata
.
name
,
metadata
.
type
.
value
,
metadata
.
value
,
str
(
metadata
.
entity_id
)
if
metadata
.
entity_id
else
None
,
str
(
metadata
.
worker_version_id
)
if
metadata
.
worker_version_id
else
None
)
for
metadata
in
MetaData
.
objects
.
filter
(
element__corpus
=
self
.
corpus
)
]
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, name, type, validated, moderator, metas, worker_version_id FROM entity
"
).
fetchall
(),
[
(
str
(
entity
.
id
),
entity
.
name
,
entity
.
type
.
value
,
int
(
entity
.
validated
),
entity
.
moderator
.
email
if
entity
.
moderator
else
None
,
json
.
dumps
(
entity
.
metas
)
if
entity
.
metas
else
None
,
str
(
entity
.
worker_version_id
)
if
entity
.
worker_version_id
else
None
,
)
for
entity
in
self
.
corpus
.
entities
.
all
()
]
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, parent_name, child_name, parent_type, child_type FROM entity_role
"
).
fetchall
(),
[
(
str
(
role
.
id
),
role
.
parent_name
,
role
.
child_name
,
role
.
parent_type
.
value
,
role
.
child_type
.
value
,
)
for
role
in
self
.
corpus
.
roles
.
all
()
]
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, parent_id, child_id, role_id FROM entity_link
"
).
fetchall
(),
[
(
str
(
link
.
id
),
str
(
link
.
parent_id
),
str
(
link
.
child_id
),
str
(
link
.
role_id
)
)
for
link
in
EntityLink
.
objects
.
filter
(
role__corpus
=
self
.
corpus
)
]
)
self
.
assertCountEqual
(
db
.
execute
(
"
SELECT id, transcription_id, entity_id, offset, length, worker_version_id FROM transcription_entity
"
).
fetchall
(),
[
(
str
(
transcription_entity
.
id
),
str
(
transcription_entity
.
transcription_id
),
str
(
transcription_entity
.
entity_id
),
transcription_entity
.
offset
,
transcription_entity
.
length
,
str
(
transcription_entity
.
worker_version_id
)
if
transcription_entity
.
worker_version_id
else
None
)
for
transcription_entity
in
TranscriptionEntity
.
objects
.
filter
(
entity__corpus
=
self
.
corpus
)
]
)
os
.
unlink
(
db_path
)
This diff is collapsed.
Click to expand it.
arkindex/project/triggers.py
+
12
−
1
View file @
93e62014
...
...
@@ -7,7 +7,7 @@ from uuid import UUID
from
django.conf
import
settings
from
arkindex.dataimport.models
import
DataImport
,
WorkerVersion
from
arkindex.documents
import
tasks
from
arkindex.documents
import
export
,
tasks
from
arkindex.documents.managers
import
ElementQuerySet
from
arkindex.documents.models
import
Corpus
,
Element
,
Entity
...
...
@@ -142,3 +142,14 @@ def initialize_activity(process: DataImport):
Initialize activity on every process elements for worker versions that are part of its workflow
"""
tasks
.
initialize_activity
.
delay
(
process
)
def
export_corpus
(
corpus
:
Corpus
,
user_id
:
Optional
[
int
]
=
None
)
->
None
:
"""
Export a corpus to a SQLite database
"""
export
.
export_corpus
.
delay
(
corpus_id
=
corpus
.
id
,
user_id
=
user_id
,
description
=
f
'
Export of corpus
{
corpus
.
name
}
'
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment