Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
Backend
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Arkindex
Backend
Commits
38a1cab7
Commit
38a1cab7
authored
6 years ago
by
Bastien Abadie
Committed by
Erwan Rouchet
6 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Bulk create Transcription
parent
e42911d4
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!22
Add score to transcriptions
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
arkindex/images/importer.py
+38
-17
38 additions, 17 deletions
arkindex/images/importer.py
with
38 additions
and
17 deletions
arkindex/images/importer.py
+
38
−
17
View file @
38a1cab7
...
...
@@ -104,35 +104,56 @@ def bulk_transcriptions(image, page, items):
# Calc needed TrBox to build
needed
=
required
.
difference
(
existing
)
if
not
needed
:
return
[]
zones
=
[]
transcriptions
=
[]
with
transaction
.
atomic
():
# Create transcriptions and linked zones
for
n
in
needed
:
tr
=
Transcription
.
objects
.
create
(
line
=
n
.
line
,
text
=
n
.
text
,
score
=
n
.
score
,
# Raw elements
elements
=
Element
.
objects
.
bulk_create
(
Element
(
type
=
ElementType
.
Transcription
)
for
_
in
needed
)
# Build transcriptions & zones instances at the same time
transcriptions
,
zones
=
zip
(
*
[
(
Transcription
(
element_ptr_id
=
elt
.
id
,
line
=
n
.
line
,
text
=
n
.
text
,
score
=
n
.
score
,
),
Zone
(
element_id
=
elt
.
id
,
image
=
image
,
polygon
=
n
.
box
.
to_polygon
(),
)
)
transcriptions
.
append
(
tr
)
for
elt
,
n
in
zip
(
elements
,
needed
)
])
# Create transcriptions using a low-level bulk_create
# as multi table is not supported yet by Django
Transcription
.
objects
.
none
().
_batched_insert
(
transcriptions
,
zones
.
append
(
Zone
(
element_id
=
tr
.
id
,
image
=
image
,
polygon
=
n
.
box
.
to_polygon
(),
))
# Here is the magic: we need only to insert the fields from documents_transcription
fields
=
set
(
Transcription
.
_meta
.
concrete_fields
).
difference
(
Element
.
_meta
.
concrete_fields
),
# Default
batch_size
=
None
,
)
#
Build
zones in bulk
#
Create
zones in bulk
Zone
.
objects
.
bulk_create
(
zones
)
# Create all links between transcription and page
max_order_dl
=
ElementLink
.
objects
.
filter
(
parent
=
page
).
order_by
(
'
-order
'
).
first
()
max_order
=
0
if
max_order_dl
is
None
else
max_order_dl
.
order
+
1
ElementLink
.
objects
.
bulk_create
(
ElementLink
(
parent
=
page
,
child
=
t
r
,
order
=
i
)
for
i
,
t
r
in
enumerate
(
transcription
s
,
max_order
)
ElementLink
(
parent
=
page
,
child
=
el
t
,
order
=
i
)
for
i
,
el
t
in
enumerate
(
element
s
,
max_order
)
)
return
transcriptions
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment