diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000000000000000000000000000000000000..f2210b91baee729c61e5300dbe4b65416ffd9526
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,7 @@
+[flake8]
+max-line-length = 120
+exclude=.cache,.eggs,.git
+# Flake8 ignores multiple errors by default;
+# the only interesting ignore is W503, which goes against PEP8.
+# See https://lintlyci.github.io/Flake8Rules/rules/W503.html
+ignore = E203,E501,W503
diff --git a/document_processing/text_flip.py b/document_processing/text_flip.py
new file mode 100644
index 0000000000000000000000000000000000000000..82a9f0c807c8dbd10914b3cfe4c899b97f5c9419
--- /dev/null
+++ b/document_processing/text_flip.py
@@ -0,0 +1,50 @@
+import codecs
+import re
+
+DATE_SEPARATORS = [{"code": "\u002F", "name": "SOLIDUS"}, {"code": "\u060D", "name": "ARABIC DATE SEPARATOR"}]
+
+
+def is_date_separator(char):
+    return char in [codecs.decode(symbol["code"], "unicode-escape") for symbol in DATE_SEPARATORS]
+
+
+def is_date(word):
+    """Loose definition to allow potential prediction errors"""
+    return all([char.isdigit() or is_date_separator(char) for char in word])
+
+
+def is_integer(word):
+    return all([char.isdigit() for char in word])
+
+
+def flip_single_word(word):
+    """Two main cases:
+    1. for dates and integers => flip the entire string
+    2. for other cases (floats, numbers in parenthesis, mix of letters and numbers) => flip only numbers
+    """
+    if is_date(word) or is_integer(word):  # should we include float ?
+        return word[::-1]
+
+    # subword tokenization
+    word = re.sub(r"([0-9]+)", r" \1 ", word).strip()
+    word = re.sub(r" +", " ", word)
+    tokens = word.split(" ")
+
+    # flip only numbers
+    reversed_word = []
+    for token in tokens:
+        if is_integer(token):
+            reversed_word.append(token[::-1])
+        else:
+            reversed_word.append(token)
+    return "".join(reversed_word)
+
+
+def should_flip(word):
+    return any([char.isdigit() for char in word])
+
+
+def flip_numbers_in_transcription(text):
+    words = text.split(" ")
+    words = [flip_single_word(word) if should_flip(word) else word for word in words]
+    return " ".join(words)
diff --git a/tests/test_text_flip.py b/tests/test_text_flip.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c447e45831a6c78be1ae306575c40714e10b2e7
--- /dev/null
+++ b/tests/test_text_flip.py
@@ -0,0 +1,7 @@
+from document_processing.text_flip import flip_numbers_in_transcription
+
+
+def test_arabic():
+    src_ = "(100) Ø¹Ø¯  293,1 21212121.2 Ù„ 1/Ø¶Ø§ÙŠØ± ØØ¶Ø± Ø¨Ø±Ù‚Ù… 4343 Ùˆ ØªØ§Ø±ÙŠØ® 2041/01/11 Ù‡ Ø¨Ø´Ø£Ù†"
+    dest = "(001) Ø¹Ø¯  392,1 12121212.2 Ù„ 1/Ø¶Ø§ÙŠØ± ØØ¶Ø± Ø¨Ø±Ù‚Ù… 3434 Ùˆ ØªØ§Ø±ÙŠØ® 11/10/1402 Ù‡ Ø¨Ø´Ø£Ù†"
+    assert flip_numbers_in_transcription(src_) == dest