Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
D
DAN
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Package Registry
Container Registry
Operate
Terraform modules
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Automatic Text Recognition
DAN
Commits
f57bc667
Commit
f57bc667
authored
1 year ago
by
Mélodie Boillet
Committed by
Yoann Schneider
1 year ago
Browse files
Options
Downloads
Patches
Plain Diff
Use a single padding method
parent
86163b14
No related branches found
Branches containing commit
No related tags found
1 merge request
!167
Use a single padding method
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
dan/manager/ocr.py
+2
-48
2 additions, 48 deletions
dan/manager/ocr.py
dan/utils.py
+17
-85
17 additions, 85 deletions
dan/utils.py
with
19 additions
and
133 deletions
dan/manager/ocr.py
+
2
−
48
View file @
f57bc667
...
@@ -6,10 +6,9 @@ import pickle
...
@@ -6,10 +6,9 @@ import pickle
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
torch
import
randint
from
dan.manager.dataset
import
DatasetManager
,
GenericDataset
,
apply_preprocessing
from
dan.manager.dataset
import
DatasetManager
,
GenericDataset
,
apply_preprocessing
from
dan.utils
import
pad_image
,
pad_images
,
pad_sequences_1D
,
token_to_ind
from
dan.utils
import
pad_images
,
pad_sequences_1D
,
token_to_ind
class
OCRDatasetManager
(
DatasetManager
):
class
OCRDatasetManager
(
DatasetManager
):
...
@@ -47,20 +46,6 @@ class OCRDatasetManager(DatasetManager):
...
@@ -47,20 +46,6 @@ class OCRDatasetManager(DatasetManager):
dataset
.
charset
=
self
.
charset
dataset
.
charset
=
self
.
charset
dataset
.
tokens
=
self
.
tokens
dataset
.
tokens
=
self
.
tokens
dataset
.
convert_labels
()
dataset
.
convert_labels
()
if
(
"
padding
"
in
dataset
.
params
[
"
config
"
]
and
dataset
.
params
[
"
config
"
][
"
padding
"
][
"
min_height
"
]
==
"
max
"
):
dataset
.
params
[
"
config
"
][
"
padding
"
][
"
min_height
"
]
=
max
(
[
s
[
"
img
"
].
shape
[
0
]
for
s
in
self
.
train_dataset
.
samples
]
)
if
(
"
padding
"
in
dataset
.
params
[
"
config
"
]
and
dataset
.
params
[
"
config
"
][
"
padding
"
][
"
min_width
"
]
==
"
max
"
):
dataset
.
params
[
"
config
"
][
"
padding
"
][
"
min_width
"
]
=
max
(
[
s
[
"
img
"
].
shape
[
1
]
for
s
in
self
.
train_dataset
.
samples
]
)
class
OCRDataset
(
GenericDataset
):
class
OCRDataset
(
GenericDataset
):
...
@@ -116,34 +101,6 @@ class OCRDataset(GenericDataset):
...
@@ -116,34 +101,6 @@ class OCRDataset(GenericDataset):
[
0
,
sample
[
"
img
"
].
shape
[
0
]],
[
0
,
sample
[
"
img
"
].
shape
[
0
]],
[
0
,
sample
[
"
img
"
].
shape
[
1
]],
[
0
,
sample
[
"
img
"
].
shape
[
1
]],
]
]
# Padding constraints to handle model needs
if
"
padding
"
in
self
.
params
[
"
config
"
]
and
self
.
params
[
"
config
"
][
"
padding
"
]:
if
(
self
.
set_name
==
"
train
"
or
not
self
.
params
[
"
config
"
][
"
padding
"
][
"
train_only
"
]
):
min_pad
=
self
.
params
[
"
config
"
][
"
padding
"
][
"
min_pad
"
]
max_pad
=
self
.
params
[
"
config
"
][
"
padding
"
][
"
max_pad
"
]
pad_width
=
(
randint
(
min_pad
,
max_pad
,
(
1
,))
if
min_pad
is
not
None
and
max_pad
is
not
None
else
None
)
pad_height
=
(
randint
(
min_pad
,
max_pad
,
(
1
,))
if
min_pad
is
not
None
and
max_pad
is
not
None
else
None
)
sample
[
"
img
"
],
sample
[
"
img_position
"
]
=
pad_image
(
sample
[
"
img
"
],
new_width
=
self
.
params
[
"
config
"
][
"
padding
"
][
"
min_width
"
],
new_height
=
self
.
params
[
"
config
"
][
"
padding
"
][
"
min_height
"
],
pad_width
=
pad_width
,
pad_height
=
pad_height
,
padding_mode
=
self
.
params
[
"
config
"
][
"
padding
"
][
"
mode
"
],
return_position
=
True
,
)
return
sample
return
sample
def
convert_labels
(
self
):
def
convert_labels
(
self
):
...
@@ -178,11 +135,8 @@ class OCRCollateFunction:
...
@@ -178,11 +135,8 @@ class OCRCollateFunction:
labels
=
pad_sequences_1D
(
labels
,
padding_value
=
self
.
label_padding_value
)
labels
=
pad_sequences_1D
(
labels
,
padding_value
=
self
.
label_padding_value
)
labels
=
torch
.
tensor
(
labels
).
long
()
labels
=
torch
.
tensor
(
labels
).
long
()
padding_mode
=
(
self
.
config
[
"
padding_mode
"
]
if
"
padding_mode
"
in
self
.
config
else
"
br
"
)
imgs
=
[
batch_data
[
i
][
"
img
"
]
for
i
in
range
(
len
(
batch_data
))]
imgs
=
[
batch_data
[
i
][
"
img
"
]
for
i
in
range
(
len
(
batch_data
))]
imgs
=
pad_images
(
imgs
,
padding_mode
=
padding_mode
)
imgs
=
pad_images
(
imgs
)
imgs
=
torch
.
tensor
(
imgs
).
float
().
permute
(
0
,
3
,
1
,
2
)
imgs
=
torch
.
tensor
(
imgs
).
float
().
permute
(
0
,
3
,
1
,
2
)
formatted_batch_data
=
{
formatted_batch_data
=
{
...
...
This diff is collapsed.
Click to expand it.
dan/utils.py
+
17
−
85
View file @
f57bc667
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
from
torch
import
randint
# Layout begin-token to end-token
# Layout begin-token to end-token
SEM_MATCHING_TOKENS
=
{
"
ⓘ
"
:
"
Ⓘ
"
,
"
ⓓ
"
:
"
Ⓓ
"
,
"
ⓢ
"
:
"
Ⓢ
"
,
"
ⓒ
"
:
"
Ⓒ
"
,
"
ⓟ
"
:
"
Ⓟ
"
,
"
ⓐ
"
:
"
Ⓐ
"
}
SEM_MATCHING_TOKENS
=
{
"
ⓘ
"
:
"
Ⓘ
"
,
"
ⓓ
"
:
"
Ⓓ
"
,
"
ⓢ
"
:
"
Ⓢ
"
,
"
ⓒ
"
:
"
Ⓒ
"
,
"
ⓟ
"
:
"
Ⓟ
"
,
"
ⓐ
"
:
"
Ⓐ
"
}
...
@@ -25,89 +24,29 @@ def pad_sequences_1D(data, padding_value):
...
@@ -25,89 +24,29 @@ def pad_sequences_1D(data, padding_value):
return
padded_data
return
padded_data
def
pad_images
(
data
,
padding_mode
=
"
br
"
):
def
pad_images
(
data
):
"""
"""
data: list of numpy array
Pad the images so that they are in the middle of the large padded image (tb-lr mode).
mode:
"
br
"
/
"
tl
"
/
"
random
"
(bottom-right, top-left, random)
:param data: List of numpy arrays.
:return padded_data: A tensor containing all the padded images.
"""
"""
x_lengths
=
[
x
.
shape
[
0
]
for
x
in
data
]
longest_x
=
max
([
x
.
shape
[
0
]
for
x
in
data
])
y_lengths
=
[
x
.
shape
[
1
]
for
x
in
data
]
longest_y
=
max
([
x
.
shape
[
1
]
for
x
in
data
])
longest_x
=
max
(
x_lengths
)
longest_y
=
max
(
y_lengths
)
padded_data
=
np
.
zeros
((
len
(
data
),
longest_x
,
longest_y
,
data
[
0
].
shape
[
2
]))
padded_data
=
np
.
zeros
((
len
(
data
),
longest_x
,
longest_y
,
data
[
0
].
shape
[
2
]))
for
i
,
xy_len
in
enumerate
(
zip
(
x_lengths
,
y_lengths
)):
for
index
,
image
in
enumerate
(
data
):
x_len
,
y_len
=
xy_len
delta_x
=
longest_x
-
image
.
shape
[
0
]
if
padding_mode
==
"
br
"
:
delta_y
=
longest_y
-
image
.
shape
[
1
]
padded_data
[
i
,
:
x_len
,
:
y_len
,
...]
=
data
[
i
]
top
,
bottom
=
delta_x
//
2
,
delta_x
-
(
delta_x
//
2
)
elif
padding_mode
==
"
tl
"
:
left
,
right
=
delta_y
//
2
,
delta_y
-
(
delta_y
//
2
)
padded_data
[
i
,
-
x_len
:,
-
y_len
:,
...]
=
data
[
i
]
padded_data
[
elif
padding_mode
==
"
random
"
:
index
,
xmax
=
longest_x
-
x_len
top
:
padded_data
.
shape
[
1
]
-
bottom
,
ymax
=
longest_y
-
y_len
left
:
padded_data
.
shape
[
2
]
-
right
,
xi
=
randint
(
0
,
xmax
,
(
1
,))
if
xmax
>=
1
else
0
:,
yi
=
randint
(
0
,
ymax
,
(
1
,))
if
ymax
>=
1
else
0
]
=
image
padded_data
[
i
,
xi
:
xi
+
x_len
,
yi
:
yi
+
y_len
,
...]
=
data
[
i
]
else
:
raise
NotImplementedError
(
"
Undefined padding mode: {}
"
.
format
(
padding_mode
))
return
padded_data
return
padded_data
def
pad_image
(
image
,
new_height
=
None
,
new_width
=
None
,
pad_width
=
None
,
pad_height
=
None
,
padding_mode
=
"
br
"
,
return_position
=
False
,
):
"""
data: list of numpy array
mode:
"
br
"
/
"
tl
"
/
"
random
"
(bottom-right, top-left, random)
"""
if
pad_width
is
not
None
and
new_width
is
not
None
:
raise
NotImplementedError
(
"
pad_with and new_width are not compatible
"
)
if
pad_height
is
not
None
and
new_height
is
not
None
:
raise
NotImplementedError
(
"
pad_height and new_height are not compatible
"
)
h
,
w
,
c
=
image
.
shape
pad_width
=
(
pad_width
if
pad_width
is
not
None
else
max
(
0
,
new_width
-
w
)
if
new_width
is
not
None
else
0
)
pad_height
=
(
pad_height
if
pad_height
is
not
None
else
max
(
0
,
new_height
-
h
)
if
new_height
is
not
None
else
0
)
if
not
(
pad_width
==
0
and
pad_height
==
0
):
padded_image
=
np
.
zeros
((
h
+
pad_height
,
w
+
pad_width
,
c
))
if
padding_mode
==
"
br
"
:
hi
,
wi
=
0
,
0
elif
padding_mode
==
"
tl
"
:
hi
,
wi
=
pad_height
,
pad_width
elif
padding_mode
==
"
random
"
:
hi
=
randint
(
0
,
pad_height
,
(
1
,))
if
pad_height
>=
1
else
0
wi
=
randint
(
0
,
pad_width
,
(
1
,))
if
pad_width
>=
1
else
0
else
:
raise
NotImplementedError
(
"
Undefined padding mode: {}
"
.
format
(
padding_mode
))
padded_image
[
hi
:
hi
+
h
,
wi
:
wi
+
w
,
...]
=
image
output
=
padded_image
else
:
hi
,
wi
=
0
,
0
output
=
image
if
return_position
:
return
output
,
[[
hi
,
hi
+
h
],
[
wi
,
wi
+
w
]]
return
output
def
read_image
(
filename
,
scale
=
1.0
):
def
read_image
(
filename
,
scale
=
1.0
):
"""
"""
Read image and rescale it
Read image and rescale it
...
@@ -122,13 +61,6 @@ def read_image(filename, scale=1.0):
...
@@ -122,13 +61,6 @@ def read_image(filename, scale=1.0):
return
image
return
image
def
round_floats
(
float_list
,
decimals
=
2
):
"""
Round list of floats with fixed decimals
"""
return
[
np
.
around
(
num
,
decimals
)
for
num
in
float_list
]
# Charset / labels conversion
# Charset / labels conversion
def
token_to_ind
(
labels
,
str
):
def
token_to_ind
(
labels
,
str
):
return
[
labels
.
index
(
c
)
for
c
in
str
]
return
[
labels
.
index
(
c
)
for
c
in
str
]
...
...
This diff is collapsed.
Click to expand it.
Mélodie Boillet
@mboillet
mentioned in merge request
!224 (merged)
·
1 year ago
mentioned in merge request
!224 (merged)
mentioned in merge request !224
Toggle commit list
Mélodie Boillet
@mboillet
mentioned in issue
#136
·
1 year ago
mentioned in issue
#136
mentioned in issue #136
Toggle commit list
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment