Skip to content
Snippets Groups Projects
Commit 8c7153bc authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'dedupe-classifications' into 'master'

Deduplicate classifications on datasource migration

Closes #596

See merge request !1156
parents 6c6e8f53 e59f9ee1
No related branches found
No related tags found
1 merge request!1156Deduplicate classifications on datasource migration
......@@ -108,6 +108,30 @@ class Migration(migrations.Migration):
reverse_code=migrations.RunPython.noop,
elidable=True,
),
# Duplicate classifications may still remain
migrations.RunSQL(
"""
DELETE FROM documents_classification WHERE id IN (
SELECT id
FROM (
SELECT c.id, ROW_NUMBER() OVER (
PARTITION BY x.element_id
ORDER BY c.confidence DESC
) AS nb
FROM (
SELECT element_id, ml_class_id
FROM documents_classification
GROUP BY element_id, ml_class_id
HAVING COUNT(id) > 1
) AS x
INNER JOIN documents_classification AS c USING (element_id, ml_class_id)
) AS y
WHERE y.nb > 1
);
""",
reverse_sql=migrations.RunSQL.noop,
elidable=True,
),
# Deletion happens in another migration, since updating data then trying to update
# the structure causes errors with 'pending trigger events'
]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment