From aa2867dcba3b81f538549f66249c917495e472bb Mon Sep 17 00:00:00 2001
From: Bastien Abadie <abadie@teklia.com>
Date: Wed, 28 Aug 2024 09:46:36 +0000
Subject: [PATCH] Import model pylaia_fraktur from pylaia

---
 pylaia/fraktur/README.md              |  16 +++
 pylaia/fraktur/language_model.arpa.gz |   3 +
 pylaia/fraktur/lexicon.txt            | 160 ++++++++++++++++++++++++++
 pylaia/fraktur/model                  | Bin 0 -> 1515 bytes
 pylaia/fraktur/syms.txt               | 160 ++++++++++++++++++++++++++
 pylaia/fraktur/tokens.txt             | 160 ++++++++++++++++++++++++++
 pylaia/fraktur/weights.ckpt           |   3 +
 7 files changed, 502 insertions(+)
 create mode 100644 pylaia/fraktur/README.md
 create mode 100644 pylaia/fraktur/language_model.arpa.gz
 create mode 100644 pylaia/fraktur/lexicon.txt
 create mode 100644 pylaia/fraktur/model
 create mode 100644 pylaia/fraktur/syms.txt
 create mode 100644 pylaia/fraktur/tokens.txt
 create mode 100644 pylaia/fraktur/weights.ckpt

diff --git a/pylaia/fraktur/README.md b/pylaia/fraktur/README.md
new file mode 100644
index 0000000..ee4c8bd
--- /dev/null
+++ b/pylaia/fraktur/README.md
@@ -0,0 +1,16 @@
+# PyLaia Austrian Newspaper (fraktur)
+
+## Datasets
+
+Trained on horizontal text-lines from the [Austrian Newspaper](https://demo.arkindex.org/browse/4dc4af87-20d0-4101-8ce9-6e427517c2b2?top_level=true&folder=true) corpus.
+
+## Results
+
+* Fixed line height of 128 pixels
+* LM = kenlm 6-gram character model trained on the training set
+
+| Model      | Split   |   CER (%) |   WER (%) |   Support |
+|------------|---------|-----------|-----------|-----------|
+| PyLaia     | train   |      1.62 |      5.63 |     38891 |
+| PyLaia     | val     |      1.82 |      7.77 |      3282 |
+| PyLaia+LM  | val     |      1.77 |      7.01 |      3282 |
diff --git a/pylaia/fraktur/language_model.arpa.gz b/pylaia/fraktur/language_model.arpa.gz
new file mode 100644
index 0000000..d49869f
--- /dev/null
+++ b/pylaia/fraktur/language_model.arpa.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22cc1a2cba477088a94908c5ab9ae9482a3641a87b6a7a79909b1ce8544c4d42
+size 28302572
diff --git a/pylaia/fraktur/lexicon.txt b/pylaia/fraktur/lexicon.txt
new file mode 100644
index 0000000..ad60e5b
--- /dev/null
+++ b/pylaia/fraktur/lexicon.txt
@@ -0,0 +1,160 @@
+<ctc> <ctc>
+! !
+" "
+# #
+% %
+& &
+' '
+( (
+) )
+* *
++ +
+, ,
+- -
+. .
+/ /
+0 0
+1 1
+2 2
+3 3
+4 4
+5 5
+6 6
+7 7
+8 8
+9 9
+: :
+; ;
+= =
+? ?
+A A
+B B
+C C
+D D
+E E
+F F
+G G
+H H
+I I
+J J
+K K
+L L
+M M
+N N
+O O
+P P
+Q Q
+R R
+S S
+T T
+U U
+V V
+W W
+X X
+Y Y
+Z Z
+[ [
+] ]
+_ _
+a a
+b b
+c c
+d d
+e e
+f f
+g g
+h h
+i i
+j j
+k k
+l l
+m m
+n n
+o o
+p p
+q q
+r r
+s s
+t t
+u u
+v v
+w w
+x x
+y y
+z z
+| |
+~ ~
+§ §
+« «
+¬ ¬
+° °
+± ±
+´ ´
+· ·
+» »
+¼ ¼
+½ ½
+¾ ¾
+Ä Ä
+È È
+É É
+Ê Ê
+Ó Ó
+Ô Ô
+Ö Ö
+Ü Ü
+ß ß
+à à
+á á
+â â
+ã ã
+ä ä
+æ æ
+ç ç
+è è
+é é
+ê ê
+ë ë
+ï ï
+ñ ñ
+ò ò
+ó ó
+ô ô
+ö ö
+û û
+ü ü
+Å™ Å™
+Å  Å 
+Ž Ž
+ž ž
+Ë™ Ë™
+· ·
+Δ Δ
+Ο Ο
+– –
+— —
+‘ ‘
+’ ’
+‚ ‚
+“ “
+” ”
+„ „
+† †
+• •
+… …
+â…“ â…“
+â…• â…•
+â…™ â…™
+â…› â…›
+⅜ ⅜
+⅝ ⅝
+â…ž â…ž
+∆ ∆
+≅ ≅
+═ ═
+â–„ â–„
+â–¡ â–¡
+â—‹ â—‹
+● ●
+<unk> <unk>
+<space> <space>
diff --git a/pylaia/fraktur/model b/pylaia/fraktur/model
new file mode 100644
index 0000000000000000000000000000000000000000..75fb6dd19d327331f8ad400d8e8f7d2fbbb4dd1c
GIT binary patch
literal 1515
zcmZ`(|8^5a6yByZZI`x`S`kF6C~B(hmLCd=BG_uNZW>~tx}xhk-3*x}o7sH(qgc;D
zoj-C8eF=|`;v0B;10TgZn<;GoCnslTzWd#8?#$eKo9^f_%bJ+5o*6UNywy!wJYH$B
zoH@NtyuLbX-Fs^MUU>wo=2gj^$VJRC;+>&I%p&HL?wKR<)Y0o8Or=D{qYW*|x*VgP
z-nUMswXH@2$4U!J+Zo0dOUcf|a#ekV@tQZM-HL7y$zG8Mp<q(-IKxEvAWJ0idMwSj
zlkfE+*|wWW7Uj_{-;lYDNq3|VTj97jO&YsA&7+XTLC$~9F}3f_5*SJuXz}};<weRf
zO#7IriAi^=j}r>BG&|kLNrgF@UFhQlh55nkMTJw`J7F|BJf*0DEc%7xB_F42KF(;F
z(LP>QI4jO_AFmkTRfPqyK)`DTcwOO~I7h%626$89yg1LfXB+t}PooycTRz?<<##lV
zj#g7x6pQ5dt^wXtSQ1MFEE~X4SP?4(Trj{zg-hZRDNh;YRur>5N+d4(c%QUCFvKeg
zSH)HG{Lla&DO?lR2>93lpD0`x*9rL405=qFikqZ8W3*XFPccyc%*Sdi%#$<}j+8n>
zC}%R~kYU$3>+Egu`nHWVQT`5t>?KKDUhU^TzMvr2YtMcBMj{)$O&gl`zvWi?_)=j*
zY*3hA)kF!n3UyH@U{eED9}R`u;x++I!}wZZOKj0Wjvri7n)H$)#~mMcpTG55oMgPU
zEu)+=kEgpVwy}MS+{@9&Hwxd1qkyOI-4Jk3;rk)r2L+#(nX8j@hvlJ=N!rDaK7!g~
z8r+*C^&;azmt~y{Kkb+KLd6!uY>%fI*dPOU6vC3LO$Ei6p(Q3aElMG-aKAjJHnq$#
z+Z4x*e-A<z3Q<|GnnGp9@!;8qB%58ob9d85r<9AML%DcJuB#xulO(RojxJRfb<B2j
zNhj`~WkE$<*5*ORQE(aZ6g_WT^ApLJFJ1wP!fCqW6Tkj-YW_FsDf-|1-*z@_RZTmi
z`p+oQ*H<gl%`V$o;-5d-+ZB@5|E4>lKTmq%QF)!v!(+HZKJ9x{SGS(S@izrK@HOEy
v-D-8o*cV2ZpD@c<Xm#*?Xe@JMg9-WROR@INdW7apcT6{A@~K#wKj8icAIzI^

literal 0
HcmV?d00001

diff --git a/pylaia/fraktur/syms.txt b/pylaia/fraktur/syms.txt
new file mode 100644
index 0000000..3b82158
--- /dev/null
+++ b/pylaia/fraktur/syms.txt
@@ -0,0 +1,160 @@
+<ctc> 0
+! 1
+" 2
+# 3
+% 4
+& 5
+' 6
+( 7
+) 8
+* 9
++ 10
+, 11
+- 12
+. 13
+/ 14
+0 15
+1 16
+2 17
+3 18
+4 19
+5 20
+6 21
+7 22
+8 23
+9 24
+: 25
+; 26
+= 27
+? 28
+A 29
+B 30
+C 31
+D 32
+E 33
+F 34
+G 35
+H 36
+I 37
+J 38
+K 39
+L 40
+M 41
+N 42
+O 43
+P 44
+Q 45
+R 46
+S 47
+T 48
+U 49
+V 50
+W 51
+X 52
+Y 53
+Z 54
+[ 55
+] 56
+_ 57
+a 58
+b 59
+c 60
+d 61
+e 62
+f 63
+g 64
+h 65
+i 66
+j 67
+k 68
+l 69
+m 70
+n 71
+o 72
+p 73
+q 74
+r 75
+s 76
+t 77
+u 78
+v 79
+w 80
+x 81
+y 82
+z 83
+| 84
+~ 85
+§ 86
+« 87
+¬ 88
+° 89
+± 90
+´ 91
+· 92
+» 93
+¼ 94
+½ 95
+¾ 96
+Ä 97
+È 98
+É 99
+Ê 100
+Ó 101
+Ô 102
+Ö 103
+Ü 104
+ß 105
+à 106
+á 107
+â 108
+ã 109
+ä 110
+æ 111
+ç 112
+è 113
+é 114
+ê 115
+ë 116
+ï 117
+ñ 118
+ò 119
+ó 120
+ô 121
+ö 122
+û 123
+ü 124
+Å™ 125
+Å  126
+Ž 127
+ž 128
+Ë™ 129
+· 130
+Δ 131
+Ο 132
+– 133
+— 134
+‘ 135
+’ 136
+‚ 137
+“ 138
+” 139
+„ 140
+† 141
+• 142
+… 143
+â…“ 144
+â…• 145
+â…™ 146
+â…› 147
+⅜ 148
+⅝ 149
+â…ž 150
+∆ 151
+≅ 152
+═ 153
+â–„ 154
+â–¡ 155
+â—‹ 156
+● 157
+<unk> 158
+<space> 159
\ No newline at end of file
diff --git a/pylaia/fraktur/tokens.txt b/pylaia/fraktur/tokens.txt
new file mode 100644
index 0000000..3a3678b
--- /dev/null
+++ b/pylaia/fraktur/tokens.txt
@@ -0,0 +1,160 @@
+<ctc>
+!
+"
+#
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+=
+?
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+]
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+|
+~
+§
+«
+¬
+°
+±
+´
+·
+»
+¼
+½
+¾
+Ä
+È
+É
+Ê
+Ó
+Ô
+Ö
+Ü
+ß
+à
+á
+â
+ã
+ä
+æ
+ç
+è
+é
+ê
+ë
+ï
+ñ
+ò
+ó
+ô
+ö
+û
+ü
+Å™
+Å 
+Ž
+ž
+Ë™
+·
+Δ
+Ο
+–
+—
+‘
+’
+‚
+“
+”
+„
+†
+•
+…
+â…“
+â…•
+â…™
+â…›
+⅜
+⅝
+â…ž
+∆
+≅
+═
+â–„
+â–¡
+â—‹
+●
+<unk>
+<space>
\ No newline at end of file
diff --git a/pylaia/fraktur/weights.ckpt b/pylaia/fraktur/weights.ckpt
new file mode 100644
index 0000000..409bb4c
--- /dev/null
+++ b/pylaia/fraktur/weights.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:263563dfd71bce1b6e42156a8f9b70dc09795f585a28c4d098d17952f4b76225
+size 42996316
-- 
GitLab