diff --git a/document_processing/transcription.py b/document_processing/transcription.py
new file mode 100644
index 0000000000000000000000000000000000000000..49bd8e2ecf2fef4a53f1f98fdb57cffff87eea47
--- /dev/null
+++ b/document_processing/transcription.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+
+import itertools
+from typing import List
+
+from document_processing.utils import BoundingBox, Point, TextOrientation, bbox_to_polygon, bounding_rect
+
+
+class Transcription:
+    def __init__(
+        self,
+        element_id,
+        polygon,
+        text,
+        confidence,
+        orientation=TextOrientation.HorizontalLeftToRight,
+        rotation_class=None,
+        rotation_class_confidence=None,
+    ):
+        self.element_id = element_id
+        self.polygon = polygon
+        self.text = text
+        self.confidence = confidence
+        if isinstance(orientation, TextOrientation):
+            self.orientation = orientation
+        else:
+            self.orientation = TextOrientation(orientation)
+        self.rotation_class = rotation_class
+        self.rotation_class_confidence = rotation_class_confidence
+
+    @property
+    def rect(self):
+        return BoundingBox._make(bounding_rect(self.polygon))
+
+    def __repr__(self):
+        return str(vars(self))
+
+    def merge(self, other):
+        """
+        Extend inner data (polygon, text, confidence) with another line data.
+
+        Keep a polygon that contains both lines' polygon.
+        Text is concatenated, starting from the leftmost line.
+        Confidences are averaged if there are any.
+        """
+        assert isinstance(other, Transcription)
+
+        if other.rect.x < self.rect.x:
+            self.text = other.text + " " + self.text
+        else:
+            self.text += " " + other.text
+
+        if self.confidence and other.confidence:
+            self.confidence = (self.confidence + other.confidence) / 2
+        elif other.confidence:
+            self.confidence = other.confidence
+
+        self.polygon = get_global_polygon(self.polygon, other.polygon)
+
+    @property
+    def center(self) -> Point:
+        """Compute barycenter of this polygon"""
+        return Point(
+            round((self.rect.x + self.rect.width) / 2),
+            round((self.rect.y + self.rect.height) / 2),
+        )
+
+
+def get_global_polygon(a: BoundingBox, b: BoundingBox):
+    """
+    Look for most extreme points on lines
+    """
+    xa, ya = zip(*a)
+    xb, yb = zip(*b)
+
+    minx = min(xa + xb)
+    miny = min(ya + yb)
+    maxx = max(xa + xb)
+    maxy = max(ya + yb)
+    return bbox_to_polygon(
+        BoundingBox(
+            x=minx,
+            y=miny,
+            width=maxx - minx,
+            height=maxy - miny,
+        )
+    )
+
+
+def merge_close_lines(lines: List[Transcription], threshold=0.5):
+    """
+    Merge lines that are close to each other (on vertical axis)
+    """
+    if threshold is None:
+        threshold = 0.5
+
+    removed = []
+    temp = lines.copy()
+    for a, b in itertools.combinations(temp, r=2):
+        if b in removed or a in removed:
+            continue
+
+        ratio = abs((b.center.y - a.center.y) / a.rect.height)
+        if ratio <= threshold:
+            a.merge(b)
+            removed.append(b)
+
+    return [line for line in lines if line not in removed]
diff --git a/document_processing/utils.py b/document_processing/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2d44d25efc57aa2c40830948233975250d04e45
--- /dev/null
+++ b/document_processing/utils.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+from enum import Enum
+from typing import List, NamedTuple
+
+BoundingBox = NamedTuple("BoundingBox", x=int, y=int, width=int, height=int)
+Point = NamedTuple("Point", x=int, y=int)
+
+
+class TextOrientation(Enum):
+    """
+    Orientation of a transcription's text.
+    Copied from https://gitlab.com/teklia/workers/base-worker/-/blob/master/arkindex_worker/worker/transcription.py
+    """
+
+    HorizontalLeftToRight = "horizontal-lr"
+    """
+    The text is read from top to bottom then left to right.
+    This is the default when no orientation is specified.
+    """
+
+    HorizontalRightToLeft = "horizontal-rl"
+    """
+    The text is read from top to bottom then right to left.
+    """
+
+    VerticalRightToLeft = "vertical-rl"
+    """
+    The text is read from right to left then top to bottom.
+    """
+
+    VerticalLeftToRight = "vertical-lr"
+    """
+    The text is read from left to right then top to bottom.
+    """
+
+
+def bounding_rect(polygon: list) -> BoundingBox:
+    """Compute the bounding rectangle from polygon.
+
+    :returns x,y of top left corner and width and height
+    """
+    xs = [int(point[0]) for point in polygon]
+    ys = [int(point[1]) for point in polygon]
+    x, y, x2, y2 = min(xs), min(ys), max(xs), max(ys)
+
+    height = y2 - y
+    width = x2 - x
+    return BoundingBox(x, y, width, height)
+
+
+def bbox_to_polygon(bbox: BoundingBox) -> List[List[int]]:
+    x, y, width, height = bbox
+    return [
+        [x, y],
+        [x + width, y],
+        [x + width, y + height],
+        [x, y + height],
+        [x, y],
+    ]
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
deleted file mode 100644
index f4f53619168f8993841e5a85193b424a60085554..0000000000000000000000000000000000000000
--- a/tests/test_dummy.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_dummy():
-    assert True
diff --git a/tests/test_transcription.py b/tests/test_transcription.py
new file mode 100644
index 0000000000000000000000000000000000000000..258b0b98ff16d4beb250ab1ce498d2baab686ff4
--- /dev/null
+++ b/tests/test_transcription.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+from document_processing.transcription import Transcription, merge_close_lines
+
+
+def test_simple_merge():
+    """
+    Check if the polygons are merged and if the text is in the correct order
+    """
+
+    # Polygon on the upper left corner
+    left_line = Transcription(
+        element_id="Fake_element_1",
+        confidence=1.0,
+        polygon=[
+            [0, 0],
+            [60, 0],
+            [60, 20],
+            [0, 20],
+        ],
+        text="Hello",
+    )
+
+    # Polygon right after A
+    right_line = Transcription(
+        element_id="Fake_element_2",
+        confidence=0.5,
+        polygon=[
+            [200, 10],
+            [580, 10],
+            [580, 30],
+            [200, 30],
+        ],
+        text="World !",
+    )
+
+    # And another point on another line
+    below_line = Transcription(
+        element_id="Fake_element_3",
+        confidence=1.0,
+        polygon=[
+            [100, 100],
+            [100, 200],
+            [400, 200],
+            [400, 100],
+        ],
+        text="Another line far away",
+    )
+
+    up_paragraph, down_paragraph = merge_close_lines([left_line, right_line, below_line])
+
+    # Top lines are merged
+    assert up_paragraph.polygon == [
+        [0, 0],
+        [580, 0],
+        [580, 30],
+        [0, 30],
+        [0, 0],
+    ]
+    assert up_paragraph.text == "Hello World !"
+    assert up_paragraph.confidence == 0.75
+
+    # Bottom paragraph only has one line
+    assert down_paragraph.polygon == [
+        [100, 100],
+        [100, 200],
+        [400, 200],
+        [400, 100],
+    ]
+    assert down_paragraph.text == "Another line far away"
+    assert down_paragraph.confidence == 1.0
+
+
+def test_reversed_merge():
+    """
+    Check if the polygons are merged and if the merged text used
+    the leftmost line first.
+    """
+    # Polygon on the right compared to the other one
+    right_line = Transcription(
+        element_id="Fake_element_2",
+        confidence=0.5,
+        polygon=[
+            [200, 10],
+            [580, 10],
+            [580, 30],
+            [200, 30],
+        ],
+        text="World !",
+    )
+    # Polygon on the left compared to the other one
+    left_line = Transcription(
+        element_id="Fake_element_1",
+        confidence=1.0,
+        polygon=[
+            [0, 0],
+            [60, 0],
+            [60, 20],
+            [0, 20],
+        ],
+        text="Hello",
+    )
+
+    # And another point on another line
+    below_line = Transcription(
+        element_id="Fake_element_3",
+        confidence=1.0,
+        polygon=[
+            [100, 100],
+            [100, 200],
+            [400, 200],
+            [400, 100],
+        ],
+        text="Another line far away",
+    )
+    down_para, up_paragraph = merge_close_lines([below_line, right_line, left_line])
+
+    # A and B are merged
+    assert up_paragraph.polygon == [
+        [0, 0],
+        [580, 0],
+        [580, 30],
+        [0, 30],
+        [0, 0],
+    ]
+    assert up_paragraph.text == "Hello World !"
+    assert up_paragraph.confidence == 0.75
+
+    # Paragraph below only has one line
+    assert down_para.polygon == [
+        [100, 100],
+        [100, 200],
+        [400, 200],
+        [400, 100],
+    ]
+    assert down_para.text == "Another line far away"
+    assert down_para.confidence == 1.0