From 0f7789f131735197685db0280309758f71d3be90 Mon Sep 17 00:00:00 2001
From: Bastien Abadie <abadie@teklia.com>
Date: Wed, 28 Aug 2024 09:46:19 +0000
Subject: [PATCH] Import model pylaia_rimes from pylaia

---
 pylaia/rimes/README.md              |  21 ++++++
 pylaia/rimes/language_model.arpa.gz |   3 +
 pylaia/rimes/lexicon.txt            | 100 ++++++++++++++++++++++++++++
 pylaia/rimes/model                  | Bin 0 -> 1515 bytes
 pylaia/rimes/syms.txt               | 100 ++++++++++++++++++++++++++++
 pylaia/rimes/tokens.txt             | 100 ++++++++++++++++++++++++++++
 pylaia/rimes/weights.ckpt           |   3 +
 7 files changed, 327 insertions(+)
 create mode 100644 pylaia/rimes/README.md
 create mode 100644 pylaia/rimes/language_model.arpa.gz
 create mode 100644 pylaia/rimes/lexicon.txt
 create mode 100644 pylaia/rimes/model
 create mode 100644 pylaia/rimes/syms.txt
 create mode 100644 pylaia/rimes/tokens.txt
 create mode 100644 pylaia/rimes/weights.ckpt

diff --git a/pylaia/rimes/README.md b/pylaia/rimes/README.md
new file mode 100644
index 0000000..4b87701
--- /dev/null
+++ b/pylaia/rimes/README.md
@@ -0,0 +1,21 @@
+# PyLaia Rimes
+
+## Datasets
+
+Trained on text-lines from the [Rimes 2011 dataset](https://teklia.com/research/rimes-database/).
+
+| split  | N lines |
+|--------|--------:|
+| train  | 10,188  |
+| val    |  1,138  |
+| test   |    778  |
+
+## Results
+
+* Fixed line height: 128 pixels
+* Language model: 6-gram character model trained on the training set with KenLM
+
+| Model             			  | val CER | test CER | val WER | test WER |
+|:--------------------------------|--------:|---------:|--------:|---------:|
+| Model without LM 	              | 4.55  	| 4.53 	   | 14.39   | 15.06    |
+| Model with LM (`weight = 1.5`)  | 3.68    | 3.47 	   | 10.01 	 | 10.20    |
diff --git a/pylaia/rimes/language_model.arpa.gz b/pylaia/rimes/language_model.arpa.gz
new file mode 100644
index 0000000..fae3587
--- /dev/null
+++ b/pylaia/rimes/language_model.arpa.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:119d9433498a3c17f934be86db2dbc7794cc5ff3861c8d0a4fa8e197f10658d5
+size 5140408
diff --git a/pylaia/rimes/lexicon.txt b/pylaia/rimes/lexicon.txt
new file mode 100644
index 0000000..b33b289
--- /dev/null
+++ b/pylaia/rimes/lexicon.txt
@@ -0,0 +1,100 @@
+<ctc> <ctc>
+! !
+" "
+% %
+' '
+( (
+) )
+, ,
+- -
+. .
+/ /
+0 0
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+: :
+; ;
+= =
+? ?
+A A
+B B
+C C
+D D
+E E
+F F
+G G
+H H
+I I
+J J
+K K
+L L
+M M
+N N
+O O
+P P
+Q Q
+R R
+S S
+T T
+U U
+V V
+W W
+X X
+Y Y
+Z Z
+_ _
+a a
+b b
+c c
+d d
+e e
+f f
+g g
+h h
+i i
+j j
+k k
+l l
+m m
+n n
+o o
+p p
+q q
+r r
+s s
+t t
+u u
+v v
+w w
+x x
+y y
+z z
+{ {
+} }
+¤ ¤
+° °
+² ²
+À À
+É É
+à à
+â â
+ç ç
+è è
+é é
+ê ê
+ë ë
+î î
+ô ô
+ù ù
+û û
+Å“ Å“
+€ €
+<unk> <unk>
+<space> <space>
diff --git a/pylaia/rimes/model b/pylaia/rimes/model
new file mode 100644
index 0000000000000000000000000000000000000000..d4cd1bf7472e430655c1a7be353b957b0f99af95
GIT binary patch
literal 1515
zcmZ`(e|Hl_6y2sYZI`x`3JRiC1ZAsj$`1ua5p1<kHw~dsUD0)&ZidW~&1~-eDAsd0
zI)8*?zl2}O<2Uf5_+~Su4dCSD?96@l-FY+f-fOy}M=Wb%!g^`USSPG*(&F)Qi{-4=
z>%?m-v)02G#vhfZupF;S?nEwPjuG!PEn*h2TIrtEM4s04ItWuKQSoR)OR_G<sHgX>
zlWBFc(ZG??!qRqzv4v8y^Q2r=pJLqc=CoVU4I<eq@*osUN*-sJ2p?yOBwmlD8L#EL
zy-2q0W|BpDw8PhBZe!9N>BCky>P?fz4o~waWO0!5Uvo_Dd9wtDQU+T55odXk@(j~H
zW*jl;PW5q2VU}j6`Z%sIN3)B4yrytsFgvO6I`@tljSf#KsvwJg<9NfzDaXfYEi>B3
zn+j*d8SdjP1H7#;FXjn&#{lmtoE2vYc+UXm6wZtDoO`yB&+;^CalG&215&=AX>>G4
zVL>dA--ibHNMTVd60l@|n!>VJCg7q0E-73Vmq~fbD7T`R<xwJW#mC2_{fQx7Rk$Xu
zk>{rd_)OutxK6<52KYkZhPXk%mj<}0a7)}G?HQxZLVAjU`d2<yoG?$)P}HQ<8A3Ue
z)eabTowLsFCa-VVSQX{(Ajn>l#O2j~?c*B?a?N?=<2Mr7;BDH_y#H;t(#IWzb+Jxi
ze(Q)5a24vJPQZo+tUekFcg0--nuc*tVN-0<L5}WUQkwLVBFB9n4_>|XYMf-ewI!pR
zGLNS_EVi+Ao7~IM$9D?fi^G7Y@WT-BP~pcR;3oy2mzk@RberX&kV)Fb&pra@Pa52t
zB=sWWL6>En48QD^`9j4O#B7(R8Q35Lw-v&Yt4#&Pn4u*mH!MmauJEWlrZ%<AG20Z!
z&42ep7Yb2Xu$n?;#_{;&ha{U_zyDyvMyHgEqXW5kNUp0Oz2hXV%Z@Hp7j?|Gbx9{~
zt1PIf%i281I0`O9o}%ZCYkng6(xt0FQPi65_{8$FlbzqGr|5t4f7{u#RW<F5>OZ4I
zUt6hAH@jkMiGTiTZ&gTI|C{cJ{yguAhvjub50Bvv`J(S(UEO*P#@`g|z}JM+bgR`x
rW1k;gdd@6kq1FEPiLuO$4JPELFU8t3>k*na-7(#e$){pz{($=*Yj~Q^

literal 0
HcmV?d00001

diff --git a/pylaia/rimes/syms.txt b/pylaia/rimes/syms.txt
new file mode 100644
index 0000000..bb2efde
--- /dev/null
+++ b/pylaia/rimes/syms.txt
@@ -0,0 +1,100 @@
+<ctc> 0
+! 1
+" 2
+% 3
+' 4
+( 5
+) 6
+, 7
+- 8
+. 9
+/ 10
+0 11
+1 12
+2 13
+3 14
+4 15
+5 16
+6 17
+7 18
+8 19
+9 20
+: 21
+; 22
+= 23
+? 24
+A 25
+B 26
+C 27
+D 28
+E 29
+F 30
+G 31
+H 32
+I 33
+J 34
+K 35
+L 36
+M 37
+N 38
+O 39
+P 40
+Q 41
+R 42
+S 43
+T 44
+U 45
+V 46
+W 47
+X 48
+Y 49
+Z 50
+_ 51
+a 52
+b 53
+c 54
+d 55
+e 56
+f 57
+g 58
+h 59
+i 60
+j 61
+k 62
+l 63
+m 64
+n 65
+o 66
+p 67
+q 68
+r 69
+s 70
+t 71
+u 72
+v 73
+w 74
+x 75
+y 76
+z 77
+{ 78
+} 79
+¤ 80
+° 81
+² 82
+À 83
+É 84
+à 85
+â 86
+ç 87
+è 88
+é 89
+ê 90
+ë 91
+î 92
+ô 93
+ù 94
+û 95
+Å“ 96
+€ 97
+<unk> 98
+<space> 99
diff --git a/pylaia/rimes/tokens.txt b/pylaia/rimes/tokens.txt
new file mode 100644
index 0000000..143fa1a
--- /dev/null
+++ b/pylaia/rimes/tokens.txt
@@ -0,0 +1,100 @@
+<ctc>
+!
+"
+%
+'
+(
+)
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+=
+?
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+}
+¤
+°
+²
+À
+É
+à
+â
+ç
+è
+é
+ê
+ë
+î
+ô
+ù
+û
+Å“
+€
+<unk>
+<space>
diff --git a/pylaia/rimes/weights.ckpt b/pylaia/rimes/weights.ckpt
new file mode 100644
index 0000000..27cf064
--- /dev/null
+++ b/pylaia/rimes/weights.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b24aa78625c3c5cfbd07926aee3d5890dec2d7aab931a1c55bdbe6770236a8aa
+size 42750044
-- 
GitLab