Skip to content
Snippets Groups Projects

Support subword and word language models

Merged Solene Tarride requested to merge subword-and-word-lm into main
All threads resolved!
1 file
+ 68
0
Compare changes
  • Side-by-side
  • Inline
+ 68
0
@@ -545,27 +545,38 @@ def test_run_prediction_batch(
{
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
"language_model": {
<<<<<<< HEAD
<<<<<<< HEAD
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
"confidence": 0.92,
=======
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241"
>>>>>>> c80c413 (Write tests for LM decoding)
=======
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
"confidence": 0.9226743371961854,
>>>>>>> e1ebd55 (Fix tests)
},
},
{
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
"language_model": {
<<<<<<< HEAD
<<<<<<< HEAD
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
"confidence": 0.88,
=======
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376"
>>>>>>> c80c413 (Write tests for LM decoding)
=======
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
"confidence": 0.8759829104754289,
>>>>>>> e1ebd55 (Fix tests)
},
},
{
"text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
<<<<<<< HEAD
<<<<<<< HEAD
"language_model": {
"text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
@@ -585,6 +596,19 @@ def test_run_prediction_batch(
"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
"language_model": {"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère"},
>>>>>>> c80c413 (Write tests for LM decoding)
=======
"language_model": {
"text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
"confidence": 0.864021797502254,
},
},
{
"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
"language_model": {
"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
"confidence": 0.8903665579889012,
},
>>>>>>> e1ebd55 (Fix tests)
},
],
),
@@ -600,27 +624,38 @@ def test_run_prediction_batch(
{
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
"language_model": {
<<<<<<< HEAD
<<<<<<< HEAD
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
"confidence": 0.90,
=======
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241"
>>>>>>> c80c413 (Write tests for LM decoding)
=======
"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
"confidence": 0.8982517863786614,
>>>>>>> e1ebd55 (Fix tests)
},
},
{
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
"language_model": {
<<<<<<< HEAD
<<<<<<< HEAD
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
"confidence": 0.84,
=======
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376"
>>>>>>> c80c413 (Write tests for LM decoding)
=======
"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
"confidence": 0.8386571587822831,
>>>>>>> e1ebd55 (Fix tests)
},
},
{
"text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
<<<<<<< HEAD
<<<<<<< HEAD
"language_model": {
"text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14331",
@@ -640,6 +675,19 @@ def test_run_prediction_batch(
"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
"language_model": {"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère"},
>>>>>>> c80c413 (Write tests for LM decoding)
=======
"language_model": {
"text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14331",
"confidence": 0.8334836549049839,
},
},
{
"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
"language_model": {
"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
"confidence": 0.8565623750166133,
},
>>>>>>> e1ebd55 (Fix tests)
},
],
),
@@ -661,17 +709,23 @@ def test_run_prediction_batch(
),
)
<<<<<<< HEAD
<<<<<<< HEAD
=======
@pytest.mark.parametrize("batch_size", [1, 2])
>>>>>>> c80c413 (Write tests for LM decoding)
=======
>>>>>>> e1ebd55 (Fix tests)
def test_run_prediction_language_model(
image_names,
language_model_weight,
expected_predictions,
<<<<<<< HEAD
<<<<<<< HEAD
=======
batch_size,
>>>>>>> c80c413 (Write tests for LM decoding)
=======
>>>>>>> e1ebd55 (Fix tests)
tmp_path,
):
# Make tmpdir and copy needed images inside
@@ -713,11 +767,15 @@ def test_run_prediction_language_model(
max_object_height=None,
image_extension=".png",
gpu_device=None,
<<<<<<< HEAD
<<<<<<< HEAD
batch_size=1,
=======
batch_size=batch_size,
>>>>>>> c80c413 (Write tests for LM decoding)
=======
batch_size=1,
>>>>>>> e1ebd55 (Fix tests)
tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
start_token=None,
use_language_model=True,
@@ -730,14 +788,24 @@ def test_run_prediction_language_model(
assert prediction["text"] == expected_prediction["text"]
if language_model_weight > 0:
print(
prediction["language_model"]["text"],
prediction["language_model"]["confidence"],
)
assert (
prediction["language_model"]["text"]
== expected_prediction["language_model"]["text"]
)
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> e1ebd55 (Fix tests)
assert np.isclose(
prediction["language_model"]["confidence"],
expected_prediction["language_model"]["confidence"],
)
<<<<<<< HEAD
=======
>>>>>>> c80c413 (Write tests for LM decoding)
=======
>>>>>>> e1ebd55 (Fix tests)
Loading