Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
D
Data Generator
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Terraform modules
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Automatic Text Recognition
Data Generator
Commits
681f7ff5
Commit
681f7ff5
authored
3 years ago
by
Martin Maarand
Browse files
Options
Downloads
Patches
Plain Diff
Don't filter vertical lines with rotation class
parent
9f61d8c8
No related branches found
No related tags found
1 merge request
!16
Don't filter vertical lines with rotation class
Pipeline
#74305
passed
3 years ago
Stage: test
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
kaldi_data_generator/main.py
+19
-14
19 additions, 14 deletions
kaldi_data_generator/main.py
kaldi_data_generator/utils.py
+9
-0
9 additions, 0 deletions
kaldi_data_generator/utils.py
with
28 additions
and
14 deletions
kaldi_data_generator/main.py
+
19
−
14
View file @
681f7ff5
...
@@ -169,8 +169,6 @@ class HTRDataGenerator:
...
@@ -169,8 +169,6 @@ class HTRDataGenerator:
raise
e
raise
e
def
get_transcriptions
(
self
,
page_id
:
str
,
accepted_zones
):
def
get_transcriptions
(
self
,
page_id
:
str
,
accepted_zones
):
count
=
0
count_skipped
=
0
lines
=
[]
lines
=
[]
try
:
try
:
for
res
in
self
.
api_client
.
paginate
(
for
res
in
self
.
api_client
.
paginate
(
...
@@ -210,14 +208,8 @@ class HTRDataGenerator:
...
@@ -210,14 +208,8 @@ class HTRDataGenerator:
polygon
=
polygon
,
polygon
=
polygon
,
text
=
text
,
text
=
text
,
)
)
if
self
.
skip_vertical_lines
:
rect
=
trans_data
.
rect
if
rect
.
height
>
rect
.
width
:
count_skipped
+=
1
continue
lines
.
append
(
trans_data
)
lines
.
append
(
trans_data
)
count
+=
1
if
self
.
should_rotate
:
if
self
.
should_rotate
:
classes_by_elem
=
self
.
get_children_classes
(
page_id
)
classes_by_elem
=
self
.
get_children_classes
(
page_id
)
...
@@ -237,7 +229,20 @@ class HTRDataGenerator:
...
@@ -237,7 +229,20 @@ class HTRDataGenerator:
else
:
else
:
logger
.
warning
(
f
"
No rotation classes on
{
trans
.
element_id
}
"
)
logger
.
warning
(
f
"
No rotation classes on
{
trans
.
element_id
}
"
)
return
(
lines
,
count
,
count_skipped
)
count_skipped
=
0
if
self
.
skip_vertical_lines
:
filtered_lines
=
[]
for
line
in
lines
:
if
line
.
is_vertical
:
count_skipped
+=
1
continue
filtered_lines
.
append
(
line
)
lines
=
filtered_lines
count
=
len
(
lines
)
return
lines
,
count
,
count_skipped
except
ErrorResponse
as
e
:
except
ErrorResponse
as
e
:
logger
.
info
(
logger
.
info
(
...
@@ -766,12 +771,12 @@ def main():
...
@@ -766,12 +771,12 @@ def main():
logger
.
info
(
logger
.
info
(
f
"
Number of skipped pages:
{
data_generator
.
skipped_pages_count
}
"
f
"
Number of skipped pages:
{
data_generator
.
skipped_pages_count
}
"
)
)
skipped_
ratio
=
data_generator
.
skipped_vertical_lines_count
/
(
_
skipped_
vertical_count
=
data_generator
.
skipped_vertical_lines_count
data_generator
.
skipped_vertical
_lines_count
_total_count
=
_skipped_vertical_count
+
data_generator
.
accepted
_lines_count
+
data_generator
.
accepted_lines_count
skipped_ratio
=
_skipped_vertical_count
/
_total_count
*
100
)
logger
.
info
(
logger
.
info
(
f
"
Skipped
{
data_generator
.
skipped_vertical_lines_count
}
vertical lines (
{
skipped_ratio
}
/1.0
)
"
f
"
Skipped
{
data_generator
.
skipped_vertical_lines_count
}
vertical lines (
{
round
(
skipped_ratio
,
2
)
}
%
)
"
)
)
else
:
else
:
logger
.
info
(
"
Creating a split from already downloaded files
"
)
logger
.
info
(
"
Creating a split from already downloaded files
"
)
...
...
This diff is collapsed.
Click to expand it.
kaldi_data_generator/utils.py
+
9
−
0
View file @
681f7ff5
...
@@ -25,6 +25,15 @@ class TranscriptionData:
...
@@ -25,6 +25,15 @@ class TranscriptionData:
self
.
rect
=
BoundingBox
.
_make
(
cv2
.
boundingRect
(
self
.
polygon
))
self
.
rect
=
BoundingBox
.
_make
(
cv2
.
boundingRect
(
self
.
polygon
))
@property
def
is_vertical
(
self
)
->
bool
:
"""
Used to filter out vertical lines. Will be ignored when rotation class is given.
"""
if
self
.
rotation_class
is
None
:
return
self
.
rect
.
height
>
self
.
rect
.
width
return
False
def
__repr__
(
self
):
def
__repr__
(
self
):
return
str
(
vars
(
self
))
return
str
(
vars
(
self
))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment