diff --git a/dan/datasets/extract/__init__.py b/dan/datasets/extract/__init__.py
index 8a4def011e469acbbbfb42543042499f051e2562..80fc3de1daba7f2441f4ee4056885c71316c64a5 100644
--- a/dan/datasets/extract/__init__.py
+++ b/dan/datasets/extract/__init__.py
@@ -161,4 +161,10 @@ def add_extract_parser(subcommands) -> None:
         help="Do not remove beginning, ending and consecutive spaces in transcriptions.",
     )
 
+    parser.add_argument(
+        "--allow-empty",
+        action="store_true",
+        help="Also extract data from element with no transcription.",
+    )
+
     parser.set_defaults(func=run)
diff --git a/dan/datasets/extract/extract.py b/dan/datasets/extract/extract.py
index e6847e0c4b3a34996846a7fcd2e1bba0d94edb9f..0251e6545f7068e294886e8b2f38cf39b33e7a99 100644
--- a/dan/datasets/extract/extract.py
+++ b/dan/datasets/extract/extract.py
@@ -74,6 +74,7 @@ class ArkindexExtractor:
         max_height: Optional[int] = None,
         keep_spaces: bool = False,
         image_extension: str = "",
+        allow_empty: bool = False,
     ) -> None:
         self.folders = folders
         self.element_type = element_type
@@ -87,7 +88,7 @@ class ArkindexExtractor:
         self.max_width = max_width
         self.max_height = max_height
         self.image_extension = image_extension
-
+        self.allow_empty = allow_empty
         self.keep_spaces = keep_spaces
 
         self.data: Dict = defaultdict(dict)
@@ -196,6 +197,8 @@ class ArkindexExtractor:
             element.id, self.transcription_worker_version
         )
         if len(transcriptions) == 0:
+            if self.allow_empty:
+                return ""
             raise NoTranscriptionError(element.id)
 
         transcription = random.choice(transcriptions)
@@ -425,6 +428,7 @@ def run(
     max_height: Optional[int],
     image_format: str,
     keep_spaces: bool,
+    allow_empty: bool,
 ):
     assert database.exists(), f"No file found @ {database}"
     open_database(path=database)
@@ -449,4 +453,5 @@ def run(
         max_height=max_height,
         keep_spaces=keep_spaces,
         image_extension=image_format,
+        allow_empty=allow_empty,
     ).run()
diff --git a/docs/usage/datasets/extract.md b/docs/usage/datasets/extract.md
index a7715d59e07d1e776b05ff72a61b8d034e88dc4f..5172933059709b9eeab189893fecd1b52fc5a9dd 100644
--- a/docs/usage/datasets/extract.md
+++ b/docs/usage/datasets/extract.md
@@ -28,6 +28,7 @@ If an image download fails for whatever reason, it won't appear in the transcrip
 | `--max-height`                   | Images larger than this height will be resized to this height.                                                                                                                                                                       | `int`           |                                                    |
 | `--keep-spaces`                  | Transcriptions are trimmed by default. Use this flag to disable this behaviour.                                                                                                                                                      | `bool`          | False                                              |
 | `--image-format`                 | Images will be saved under this format.                                                                                                                                                                                              | `str`           | `.jpg`                                             |
+| `--allow-empty`                  | Elements with no transcriptions are skipped by default. This flag disables this behaviour.                                                                                                                                           | `bool`          | False                                              |
 
 The `--tokens` argument expects a YAML-formatted file with a specific format. A list of entries with each entry describing a NER entity. The label of the entity is the key to a dict mapping the starting and ending tokens respectively.
 
diff --git a/tests/test_extract.py b/tests/test_extract.py
index 529810005df89fc9390fb2e84045327e99d90b90..d23c2c733e9ad9673b8e081a873601cbaee060be 100644
--- a/tests/test_extract.py
+++ b/tests/test_extract.py
@@ -12,7 +12,11 @@ import pytest
 from PIL import Image, ImageChops
 
 from arkindex_export import Element, Transcription
-from dan.datasets.extract.exceptions import NoEndTokenError, UnknownTokenInText
+from dan.datasets.extract.exceptions import (
+    NoEndTokenError,
+    NoTranscriptionError,
+    UnknownTokenInText,
+)
 from dan.datasets.extract.extract import IIIF_FULL_SIZE, ArkindexExtractor
 from dan.datasets.extract.utils import EntityType, insert_token, remove_spaces
 from dan.utils import parse_tokens
@@ -487,3 +491,26 @@ def test_download_image_error(iiif_url, caplog, capsys):
     # Check stdout
     captured = capsys.readouterr()
     assert captured.out == "deadbeef: Image URL must be HTTP(S)\n"
+
+
+@pytest.mark.parametrize("allow_empty", (True, False))
+def test_empty_transcription(allow_empty, mock_database):
+    extractor = ArkindexExtractor(
+        folders=["train", "val", "test"],
+        element_type=["text_line"],
+        parent_element_type="double_page",
+        output=None,
+        entity_separators=None,
+        tokens=None,
+        transcription_worker_version=None,
+        entity_worker_version=None,
+        keep_spaces=False,
+        image_extension=".jpg",
+        allow_empty=allow_empty,
+    )
+    element_no_transcription = Element(id="unknown")
+    if allow_empty:
+        assert extractor.extract_transcription(element_no_transcription) == ""
+    else:
+        with pytest.raises(NoTranscriptionError):
+            extractor.extract_transcription(element_no_transcription)