Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
D
Data Generator
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Terraform modules
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Automatic Text Recognition
Data Generator
Merge requests
!93
Skip lines where page could not be downloaded
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Skip lines where page could not be downloaded
skip-failed-pages
into
master
Overview
1
Commits
3
Pipelines
3
Changes
2
Merged
Yoann Schneider
requested to merge
skip-failed-pages
into
master
10 months ago
Overview
1
Commits
3
Pipelines
3
Changes
2
Expand
Closes
#43 (closed)
0
0
Merge request reports
Compare
version 1
version 2
2c286d00
10 months ago
version 1
4456cb0f
10 months ago
master (base)
and
version 2
latest version
c1486349
3 commits,
10 months ago
version 2
2c286d00
2 commits,
10 months ago
version 1
4456cb0f
1 commit,
10 months ago
Show latest version
2 files
+
68
−
26
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
2
Search (e.g. *.vue) (Ctrl+P)
atr_data_generator/extract/base.py
+
15
−
12
Options
@@ -7,7 +7,7 @@ from pathlib import Path
from
typing
import
Any
,
Dict
import
numpy
as
np
from
arkindex_export
import
Dataset
,
Element
,
open_database
from
arkindex_export
import
Dataset
,
Element
,
Transcription
,
open_database
from
line_image_extractor.extractor
import
extract
,
read_img
,
save_img
from
line_image_extractor.image_utils
import
polygon_to_bbox
,
resize
from
PIL
import
Image
@@ -134,6 +134,19 @@ class DataGenerator:
def
parse_image_path
(
self
,
image_path
:
Path
):
return
str
(
image_path
)
def
process_child
(
self
,
child
:
Transcription
,
split
:
str
,
image_path
:
Path
):
try
:
# Extract the image
self
.
get_image
(
child
.
element
,
image_path
)
except
Exception
as
e
:
logger
.
warn
(
f
"
Skipping element (
{
child
.
id
}
):
{
e
}
"
)
return
# Store transcription
self
.
data
[
split
][
self
.
parse_image_path
(
image_path
)]
=
self
.
parse_transcription
(
child
.
text
,
split
=
split
)
def
process_parent
(
self
,
parent
:
Element
,
split
:
str
):
"""
Process every children under this parent element.
@@ -151,17 +164,7 @@ class DataGenerator:
/
"
images
"
/
f
"
{
parent
.
id
}
_
{
child
.
element
.
name
.
split
(
'
_
'
)[
-
1
]
}
_
{
child
.
element_id
}
.jpg
"
)
try
:
# Extract the image
self
.
get_image
(
child
.
element
,
image_path
)
except
Exception
as
e
:
logger
.
warn
(
f
"
Skipping element (
{
child
.
id
}
):
{
e
}
"
)
continue
# Store transcription
self
.
data
[
split
][
self
.
parse_image_path
(
image_path
)
]
=
self
.
parse_transcription
(
child
.
text
,
split
=
split
)
self
.
process_child
(
child
=
child
,
split
=
split
,
image_path
=
image_path
)
def
export
(
self
):
"""
Loading