Skip to content
Snippets Groups Projects
Commit 96327069 authored by Solene Tarride's avatar Solene Tarride
Browse files

add arguments for word and line separators

parent db79c0e1
No related branches found
Tags 1.5.2-rc5
1 merge request!66Compute confidence scores by char, word or line
This commit is part of merge request !66. Comments created here will be created in the context of that merge request.
......@@ -63,7 +63,7 @@ def add_predict_parser(subcommands) -> None:
)
parser.add_argument(
"--confidence-score-levels",
default=[],
default="",
type=str,
nargs="+",
help="Levels of confidence scores. Should be a list of any combinaison of ['char', 'word', 'line'].",
......@@ -90,5 +90,20 @@ def add_predict_parser(subcommands) -> None:
help="Image scaling factor before creating the GIF",
required=False,
)
parser.add_argument(
"--word-separators",
default=[" ", "\n"],
type=str,
nargs="+",
help="String separators used to split text into words.",
required=False,
)
parser.add_argument(
"--line-separators",
default=["\n"],
type=str,
nargs="+",
help="String separators used to split text into lines.",
required=False,
)
parser.set_defaults(func=run)
......@@ -6,7 +6,7 @@ from PIL import Image
from dan import logger
def split_text(text, level):
def split_text(text, level, word_separators, line_separators):
"""
Split text into a list of characters, word, or lines.
:param text: Text prediction from DAN
......@@ -18,19 +18,33 @@ def split_text(text, level):
offset = 0
# split into words
elif level == "word":
text = text.replace("\n", " ")
text_split = text.split(" ")
main_sep = word_separators[0]
for other_sep in word_separators[1:]:
text = text.replace(other_sep, main_sep)
text_split = text.split(main_sep)
offset = 1
# split into lines
elif level == "line":
text_split = text.split("\n")
main_sep = line_separators[0]
for other_sep in line_separators[1:]:
text = text.replace(other_sep, main_sep)
text_split = text.split(main_sep)
offset = 1
else:
logger.error("Level should be either 'char', 'word', or 'line'")
return text_split, offset
def plot_attention(image, text, weights, level, scale, outname):
def plot_attention(
image,
text,
weights,
level,
scale,
outname,
word_separators=["\n", " "],
line_separators=["\n"],
):
"""
Create a gif by blending attention maps to the image for each text piece (char, word or line)
:param image: Input image in PIL format
......@@ -48,7 +62,7 @@ def plot_attention(image, text, weights, level, scale, outname):
image = Image.fromarray(image)
# Split text into characters, words or lines
text_list, offset = split_text(text, level)
text_list, offset = split_text(text, level, word_separators, line_separators)
# Iterate on characters, words or lines
tot_len = 0
......
......@@ -238,6 +238,8 @@ def run(
attention_map,
attention_map_level,
attention_map_scale,
word_separators,
line_separators,
):
# Create output directory if necessary
if not os.path.exists(output):
......@@ -274,10 +276,14 @@ def run(
char_confidences = prediction["confidences"][0]
result["confidences"] = {"total": np.around(np.mean(char_confidences), 2)}
if "word" in confidence_score_levels:
word_probs = compute_prob_by_separator(text, char_confidences, ["\n", " "])
word_probs = compute_prob_by_separator(
text, char_confidences, word_separators
)
result["confidences"].update({"word": round_floats(word_probs)})
if "line" in confidence_score_levels:
line_probs = compute_prob_by_separator(text, char_confidences, ["\n"])
line_probs = compute_prob_by_separator(
text, char_confidences, line_separators
)
result["confidences"].update({"line": round_floats(line_probs)})
if "char" in confidence_score_levels:
result["confidences"].update({"char": round_floats(char_confidences)})
......@@ -292,6 +298,8 @@ def run(
weights=prediction["attentions"][0],
level=attention_map_level,
scale=attention_map_scale,
word_separators=word_separators,
line_separators=line_separators,
outname=gif_filename,
)
result["attention_gif"] = gif_filename
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment