Support subword and word language models
parent
095667f4
No related branches found
No related tags found
Showing
- .gitattributes 1 addition, 0 deletions.gitattributes
- dan/datasets/extract/__init__.py 7 additions, 0 deletionsdan/datasets/extract/__init__.py
- dan/datasets/extract/arkindex.py 52 additions, 26 deletionsdan/datasets/extract/arkindex.py
- dan/datasets/extract/utils.py 123 additions, 2 deletionsdan/datasets/extract/utils.py
- dan/utils.py 6 additions, 2 deletionsdan/utils.py
- docs/get_started/training.md 14 additions, 6 deletionsdocs/get_started/training.md
- docs/usage/datasets/extract.md 2 additions, 1 deletiondocs/usage/datasets/extract.md
- docs/usage/predict/index.md 97 additions, 12 deletionsdocs/usage/predict/index.md
- docs/usage/train/language_model.md 32 additions, 4 deletionsdocs/usage/train/language_model.md
- requirements.txt 2 additions, 0 deletionsrequirements.txt
- tests/data/prediction/language_lexicon.txt 1 addition, 1 deletiontests/data/prediction/language_lexicon.txt
- tests/data/prediction/language_model.arpa 3 additions, 118388 deletionstests/data/prediction/language_model.arpa
- tests/data/prediction/language_tokens.txt 1 addition, 1 deletiontests/data/prediction/language_tokens.txt
- tests/test_extract.py 199 additions, 25 deletionstests/test_extract.py
... | @@ -6,10 +6,12 @@ flashlight-text==0.0.4 | ... | @@ -6,10 +6,12 @@ flashlight-text==0.0.4 |
imageio==2.26.1 | imageio==2.26.1 | ||
imagesize==1.4.1 | imagesize==1.4.1 | ||
mdutils==1.6.0 | mdutils==1.6.0 | ||
nltk==3.8.1 | |||
numpy==1.24.3 | numpy==1.24.3 | ||
prettytable==3.8.0 | prettytable==3.8.0 | ||
PyYAML==6.0 | PyYAML==6.0 | ||
scipy==1.10.1 | scipy==1.10.1 | ||
sentencepiece==0.1.99 | |||
teklia-line-image-extractor==0.2.8rc5 | teklia-line-image-extractor==0.2.8rc5 | ||
tenacity==8.2.3 | tenacity==8.2.3 | ||
tensorboard==2.12.2 | tensorboard==2.12.2 | ||
... | ... |
This diff is collapsed.
Please register or sign in to comment