diff --git a/arkindex_worker/cache.py b/arkindex_worker/cache.py
index 233c7d93e7a12623219b0d2205671b912cfa77f1..d887bd78bf09f7c2a9910199bb99e68f0a2dc2fc 100644
--- a/arkindex_worker/cache.py
+++ b/arkindex_worker/cache.py
@@ -13,6 +13,7 @@ from pathlib import Path
 from typing import Optional, Union
 
 from peewee import (
+    SQL,
     BooleanField,
     CharField,
     Check,
@@ -246,6 +247,28 @@ class CachedTranscriptionEntity(Model):
         table_name = "transcription_entities"
 
 
+class CachedDataset(Model):
+    id = UUIDField(primary_key=True)
+    name = CharField()
+    state = CharField(constraints=[SQL("DEFAULT 'open'")])
+    sets = TextField()
+
+    class Meta:
+        database = db
+        table_name = "datasets"
+
+
+class CachedDatasetElement(Model):
+    id = UUIDField(primary_key=True)
+    element = ForeignKeyField(column_name="element_id", field="id", model=CachedElement)
+    dataset = ForeignKeyField(column_name="dataset_id", field="id", model=CachedDataset)
+    set_name = CharField()
+
+    class Meta:
+        database = db
+        table_name = "dataset_elements"
+
+
 # Add all the managed models in that list
 # It's used here, but also in unit tests
 MODELS = [
@@ -255,8 +278,10 @@ MODELS = [
     CachedClassification,
     CachedEntity,
     CachedTranscriptionEntity,
+    CachedDataset,
+    CachedDatasetElement,
 ]
-SQL_VERSION = 2
+SQL_VERSION = 3
 
 
 def init_cache_db(path: Path):
@@ -346,6 +371,10 @@ def merge_parents_cache(paths: list, current_database: Path):
             f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
             f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
             f"REPLACE INTO classifications SELECT * FROM source_{idx}.classifications;",
+            f"REPLACE INTO entities SELECT * FROM source_{idx}.entities;",
+            f"REPLACE INTO transcription_entities SELECT * FROM source_{idx}.transcription_entities;",
+            f"REPLACE INTO datasets SELECT * FROM source_{idx}.datasets;",
+            f"REPLACE INTO dataset_elements SELECT * FROM source_{idx}.dataset_elements;",
         ]
 
         for statement in statements:
diff --git a/tests/test_cache.py b/tests/test_cache.py
index c28b8f4f524400f159fd0c53b869ba2750983a77..adcef102fcb4144d1c1a5ae1f3971d71239f6d45 100644
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@@ -57,6 +57,8 @@ def test_create_tables(tmp_path):
     create_tables()
 
     expected_schema = """CREATE TABLE "classifications" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "class_name" TEXT NOT NULL, "confidence" REAL NOT NULL, "state" VARCHAR(10) NOT NULL, "worker_run_id" TEXT, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))
+CREATE TABLE "dataset_elements" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "dataset_id" TEXT NOT NULL, "set_name" VARCHAR(255) NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"), FOREIGN KEY ("dataset_id") REFERENCES "datasets" ("id"))
+CREATE TABLE "datasets" ("id" TEXT NOT NULL PRIMARY KEY, "name" VARCHAR(255) NOT NULL, "state" VARCHAR(255) NOT NULL DEFAULT 'open', "sets" TEXT NOT NULL)
 CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "rotation_angle" INTEGER NOT NULL, "mirrored" INTEGER NOT NULL, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, "worker_run_id" TEXT, "confidence" REAL, FOREIGN KEY ("image_id") REFERENCES "images" ("id"))
 CREATE TABLE "entities" ("id" TEXT NOT NULL PRIMARY KEY, "type" VARCHAR(50) NOT NULL, "name" TEXT NOT NULL, "validated" INTEGER NOT NULL, "metas" text, "worker_run_id" TEXT)
 CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL)