diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 5f07106a5b7a705c17ce984ceb5041d2a6b31895..82d18fe1608df485f4d50688b423465443d40d0b 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -28,80 +28,53 @@ def full_statistics(): return MdUtils(file_name="").read_md_file(str(FIXTURES / "analyze" / "stats")) -@pytest.mark.parametrize( - "im_paths, expected_summary", - ( - ( - [ - "tests/data/training/training_dataset/images/0a34e13a-4ab0-4a91-8d7c-b1d8fee32628.png", - "tests/data/training/training_dataset/images/0a70e14f-feda-4607-989c-36cf581ddff5.png", - "tests/data/training/training_dataset/images/0a576062-303c-4893-a729-c09c92865d31.png", - "tests/data/training/training_dataset/images/0b2457c8-81f1-4600-84d9-f8bf2822a991.png", - "tests/data/training/training_dataset/images/fb3edb59-3678-49f8-8e16-8e32e3b0f051.png", - "tests/data/training/training_dataset/images/fe498de2-ece4-4fbe-8b53-edfce1b820f0.png", - ], - pytest.lazy_fixture("image_statistics"), - ), - ), -) -def test_display_image_statistics(im_paths, expected_summary, tmp_path): +def test_display_image_statistics(image_statistics, tmp_path): stats = Statistics(filename=tmp_path) - stats.create_image_statistics(images=im_paths) - assert stats.document.get_md_text() == expected_summary - - -@pytest.mark.parametrize( - "texts, expected_summary", - ( - ( - [ - "Teklia’s expertise is to develop document analysis\nand processing solutions using, among other things,\nOCR technology.", - "Our software combines image analysis, printed and\nhandwritten text recognition, text segmentation with\na document classification and indexation system.", - "Our objective is to deliver to our clients an automated\ndocument processing tool easy-to-use and adapted\nto their needs.", - "With the same state of mind, we developed additional solutions to\nenhance both security and business-process.", - ], - pytest.lazy_fixture("labels_statistics"), - ), - ), -) -def test_display_label_statistics(texts, expected_summary, tmp_path): + stats.create_image_statistics( + images=[ + "tests/data/training/training_dataset/images/0a34e13a-4ab0-4a91-8d7c-b1d8fee32628.png", + "tests/data/training/training_dataset/images/0a70e14f-feda-4607-989c-36cf581ddff5.png", + "tests/data/training/training_dataset/images/0a576062-303c-4893-a729-c09c92865d31.png", + "tests/data/training/training_dataset/images/0b2457c8-81f1-4600-84d9-f8bf2822a991.png", + "tests/data/training/training_dataset/images/fb3edb59-3678-49f8-8e16-8e32e3b0f051.png", + "tests/data/training/training_dataset/images/fe498de2-ece4-4fbe-8b53-edfce1b820f0.png", + ] + ) + assert stats.document.get_md_text() == image_statistics + + +def test_display_label_statistics(labels_statistics, tmp_path): filename = tmp_path / "labels.md" stats = Statistics(filename=str(filename)) - stats.create_label_statistics(labels=texts) - assert stats.document.get_md_text() == expected_summary - - -@pytest.mark.parametrize( - "texts, expected_summary", - ( - ( - [ - "ⓈDayon ⒻFernand Ⓐ6\nⓈDayen ⒻMaurice Ⓐ2\nⓈTottelier ⒻJean Baptiste Ⓐ59", - "ⓈPeryro ⒻEtienne Ⓐ33\nⓈJeannot ⒻCaroline Ⓐ24\nⓈMouline ⒻPierre Ⓐ32", - ], - pytest.lazy_fixture("ner_statistics"), - ), - ), -) -def test_display_ner_statistics(texts, expected_summary, tmp_path): + stats.create_label_statistics( + labels=[ + "Teklia’s expertise is to develop document analysis\nand processing solutions using, among other things,\nOCR technology.", + "Our software combines image analysis, printed and\nhandwritten text recognition, text segmentation with\na document classification and indexation system.", + "Our objective is to deliver to our clients an automated\ndocument processing tool easy-to-use and adapted\nto their needs.", + "With the same state of mind, we developed additional solutions to\nenhance both security and business-process.", + ] + ) + assert stats.document.get_md_text() == labels_statistics + + +def test_display_ner_statistics(ner_statistics, tmp_path): tokens = read_yaml(FIXTURES / "training" / "training_dataset" / "tokens.yaml") stats = Statistics(filename=tmp_path) - stats.create_ner_statistics(labels=texts, ner_tokens=tokens) - assert stats.document.get_md_text() == expected_summary - - -@pytest.mark.parametrize( - "labels, tokens, expected_summary", - ( - ( - FIXTURES / "training" / "training_dataset" / "labels.json", - FIXTURES / "training" / "training_dataset" / "tokens.yaml", - pytest.lazy_fixture("full_statistics"), - ), - ), -) -def test_run(labels, tokens, expected_summary, tmp_path): + stats.create_ner_statistics( + labels=[ + "ⓈDayon ⒻFernand Ⓐ6\nⓈDayen ⒻMaurice Ⓐ2\nⓈTottelier ⒻJean Baptiste Ⓐ59", + "ⓈPeryro ⒻEtienne Ⓐ33\nⓈJeannot ⒻCaroline Ⓐ24\nⓈMouline ⒻPierre Ⓐ32", + ], + ner_tokens=tokens, + ) + assert stats.document.get_md_text() == ner_statistics + + +def test_run(full_statistics, tmp_path): output_file = tmp_path / "stats.md" stats = Statistics(filename=str(output_file)) - stats.run(labels=read_json(labels), tokens=read_yaml(tokens)) - assert output_file.read_text() == expected_summary + stats.run( + labels=read_json(FIXTURES / "training" / "training_dataset" / "labels.json"), + tokens=read_yaml(FIXTURES / "training" / "training_dataset" / "tokens.yaml"), + ) + assert output_file.read_text() == full_statistics diff --git a/tox.ini b/tox.ini index 38b064f0bf444ebeb4be30ed81c69151adc8aa4e..ffb845de95a7750ccdf7d1fe5e3ecfde4bc8cd22 100644 --- a/tox.ini +++ b/tox.ini @@ -8,9 +8,7 @@ description = run the tests with pytest package = wheel wheel_build_env = .pkg deps = - # TODO: Remove this once https://gitlab.teklia.com/atr/dan/-/issues/258 is done - pytest<8,>=6 - pytest-lazy-fixture + pytest>=6 pytest-responses -rrequirements.txt commands =