diff --git a/arkindex/documents/export/dataset.sql b/arkindex/documents/export/dataset.sql index fd7990752bddaf22b3103dea95054f08f5e8f3e2..48d54910f083f8c3c81ea74b13b1387f23595bb9 100644 --- a/arkindex/documents/export/dataset.sql +++ b/arkindex/documents/export/dataset.sql @@ -2,6 +2,8 @@ SELECT dataset.id, dataset.name, dataset.state, - ARRAY_TO_STRING(dataset.sets, ',', '') + string_agg(datasetset.name, ',') FROM training_dataset dataset +INNER JOIN training_datasetset datasetset ON datasetset.dataset_id = dataset.id WHERE dataset.corpus_id = '{corpus_id}'::uuid +GROUP BY dataset.id diff --git a/arkindex/documents/export/dataset_element.sql b/arkindex/documents/export/dataset_element.sql index 4084e2e0cde82b451187c36ac6db2304d4576486..c75624c81d1c5129b2864e453cc6d61be562f480 100644 --- a/arkindex/documents/export/dataset_element.sql +++ b/arkindex/documents/export/dataset_element.sql @@ -1,8 +1,9 @@ SELECT dataset_element.id, dataset_element.element_id, - dataset_element.dataset_id, - dataset_element.set + dataset_set.dataset_id, + dataset_set.name FROM training_datasetelement dataset_element -INNER JOIN training_dataset dataset ON (dataset_element.dataset_id = dataset.id) +INNER JOIN training_datasetset dataset_set ON (dataset_element.set_id = dataset_set.id) +INNER JOIN training_dataset dataset ON (dataset_set.dataset_id = dataset.id) WHERE dataset.corpus_id = '{corpus_id}'::uuid diff --git a/arkindex/documents/fixtures/data.json b/arkindex/documents/fixtures/data.json index 15e6fbb2ce2a07342cfb794b593007597bbdcdf7..a8f8c9cd9137262b6acfc5564119fdd95b4b7f04 100644 --- a/arkindex/documents/fixtures/data.json +++ b/arkindex/documents/fixtures/data.json @@ -1,18 +1,18 @@ [ { "model": "process.process", - "pk": "76506eee-43ab-4caa-966c-9e8e5d10ef93", + "pk": "6fb4ded9-ab97-4764-b0b0-4a7040556cde", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "name": null, - "creator": 1, - "corpus": null, - "mode": "local", + "name": "Process fixture", + "creator": 2, + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "mode": "workers", "activity_state": "disabled", "started": null, "finished": null, - "farm": null, + "farm": "0464874a-12cd-4bb5-a06f-37a872a82e5a", "element": null, "folder_type": null, "element_type": null, @@ -31,18 +31,18 @@ }, { "model": "process.process", - "pk": "7a6fed8c-ed9c-4714-8036-7048462ce0f2", + "pk": "89fbd695-bd87-4a1d-afef-8647a30d3ccc", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "name": "Process fixture", - "creator": 2, - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "mode": "workers", + "name": null, + "creator": 1, + "corpus": null, + "mode": "local", "activity_state": "disabled", "started": null, "finished": null, - "farm": "395522d1-94a0-407a-b5cb-347fa68ec2c4", + "farm": null, "element": null, "folder_type": null, "element_type": null, @@ -61,7 +61,7 @@ }, { "model": "process.process", - "pk": "b6b7dbcb-e134-4274-93fd-9d6d06818c6f", + "pk": "fb8b6046-9aa6-454d-8e0b-472f93605016", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -91,25 +91,25 @@ }, { "model": "process.repository", - "pk": "bcfed350-d5be-4f27-b66a-042be3e6ee64", + "pk": "a7c6b0d5-d68c-48f7-a07b-09e0522a4d5c", "fields": { - "url": "http://gitlab/repo" + "url": "http://my_repo.fake/workers/worker" } }, { "model": "process.repository", - "pk": "e3f470b1-fc46-4bc1-8117-30e1c62962b7", + "pk": "e3e6edfa-da1c-46a1-85be-24c0c5308fdd", "fields": { - "url": "http://my_repo.fake/workers/worker" + "url": "http://gitlab/repo" } }, { "model": "process.revision", - "pk": "2d087aa1-82ba-4ec5-af14-c19e4213f913", + "pk": "179c0b17-cfa7-4157-bba0-83c83d58ee62", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "repo": "e3f470b1-fc46-4bc1-8117-30e1c62962b7", + "repo": "a7c6b0d5-d68c-48f7-a07b-09e0522a4d5c", "hash": "1337", "message": "My w0rk3r", "author": "Test user" @@ -117,11 +117,11 @@ }, { "model": "process.revision", - "pk": "7876a435-f3a5-40f8-b30b-f388f22019bf", + "pk": "e67d4944-5167-4f45-bfac-2a4d92bbd177", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "repo": "bcfed350-d5be-4f27-b66a-042be3e6ee64", + "repo": "e3e6edfa-da1c-46a1-85be-24c0c5308fdd", "hash": "42", "message": "Salve", "author": "Some user" @@ -129,50 +129,50 @@ }, { "model": "process.worker", - "pk": "12b2344f-4871-40ad-aa1b-efaa70b5823f", + "pk": "7290f39e-fa17-4e7f-a051-a0e573c3ff6b", "fields": { - "name": "Document layout analyser", - "slug": "dla", - "type": "e84d4893-ae73-4162-acb6-13d606587644", + "name": "Recognizer", + "slug": "reco", + "type": "ab7333fc-34e8-4fb6-a3bb-d733443cbe51", "description": "", - "repository": "e3f470b1-fc46-4bc1-8117-30e1c62962b7", + "repository": "a7c6b0d5-d68c-48f7-a07b-09e0522a4d5c", "public": false, "archived": null } }, { "model": "process.worker", - "pk": "1a18dc1d-49e3-4724-b1e3-cbc8da483610", + "pk": "7c61dba0-4fdc-494f-b587-abddc75b8dbb", "fields": { - "name": "Worker requiring a GPU", - "slug": "worker-gpu", - "type": "20d3065e-0ef7-428b-888f-177f59e6ddd0", + "name": "Generic worker with a Model", + "slug": "generic", + "type": "ab7333fc-34e8-4fb6-a3bb-d733443cbe51", "description": "", - "repository": "e3f470b1-fc46-4bc1-8117-30e1c62962b7", + "repository": "a7c6b0d5-d68c-48f7-a07b-09e0522a4d5c", "public": false, "archived": null } }, { "model": "process.worker", - "pk": "219444d1-2a84-49f4-94d6-d5c21e36fcb9", + "pk": "8a27660c-f976-4550-834c-84d4ad033214", "fields": { - "name": "Generic worker with a Model", - "slug": "generic", - "type": "50729501-d9c4-473d-803f-a7cf25f0f9bd", + "name": "Worker requiring a GPU", + "slug": "worker-gpu", + "type": "7af4b709-efd8-4882-b6be-6b408ebf9b2d", "description": "", - "repository": "e3f470b1-fc46-4bc1-8117-30e1c62962b7", + "repository": "a7c6b0d5-d68c-48f7-a07b-09e0522a4d5c", "public": false, "archived": null } }, { "model": "process.worker", - "pk": "667a82ac-a19a-4e97-9239-35a3026246cb", + "pk": "af68bb34-5eaa-4edc-81fa-4400af06a715", "fields": { "name": "Custom worker", "slug": "custom", - "type": "64a618df-09d0-47eb-b999-02ceb0a114a4", + "type": "7fedc658-ecaa-4727-8844-71663ec6d0a9", "description": "", "repository": null, "public": false, @@ -181,128 +181,130 @@ }, { "model": "process.worker", - "pk": "9775f6a1-6238-4943-b245-b69bde621912", + "pk": "bc503daa-b53f-45ef-9d90-7f6a24d028a1", "fields": { - "name": "Recognizer", - "slug": "reco", - "type": "50729501-d9c4-473d-803f-a7cf25f0f9bd", + "name": "Document layout analyser", + "slug": "dla", + "type": "26ca87e4-afb3-4e0f-bc1a-b0641c7dd427", "description": "", - "repository": "e3f470b1-fc46-4bc1-8117-30e1c62962b7", + "repository": "a7c6b0d5-d68c-48f7-a07b-09e0522a4d5c", "public": false, "archived": null } }, { "model": "process.worker", - "pk": "df6dbbb2-0526-46f9-8da2-5ada8f93826a", + "pk": "d757e5ad-a03e-4e81-bf81-0b2a8719220e", "fields": { "name": "File import", "slug": "file_import", - "type": "3a0b2c85-f53f-4ce7-942d-37c08f356880", + "type": "f2a53f25-1ffe-423e-b9c4-ee5a01a2b5a0", "description": "", - "repository": "e3f470b1-fc46-4bc1-8117-30e1c62962b7", + "repository": "a7c6b0d5-d68c-48f7-a07b-09e0522a4d5c", "public": false, "archived": null } }, { "model": "process.workertype", - "pk": "20d3065e-0ef7-428b-888f-177f59e6ddd0", + "pk": "26ca87e4-afb3-4e0f-bc1a-b0641c7dd427", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "worker", - "display_name": "Worker requiring a GPU" + "slug": "dla", + "display_name": "Document Layout Analysis" } }, { "model": "process.workertype", - "pk": "3a0b2c85-f53f-4ce7-942d-37c08f356880", + "pk": "7af4b709-efd8-4882-b6be-6b408ebf9b2d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "import", - "display_name": "Import" + "slug": "worker", + "display_name": "Worker requiring a GPU" } }, { "model": "process.workertype", - "pk": "50729501-d9c4-473d-803f-a7cf25f0f9bd", + "pk": "7fedc658-ecaa-4727-8844-71663ec6d0a9", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "recognizer", - "display_name": "Recognizer" + "slug": "custom", + "display_name": "Custom" } }, { "model": "process.workertype", - "pk": "64a618df-09d0-47eb-b999-02ceb0a114a4", + "pk": "ab7333fc-34e8-4fb6-a3bb-d733443cbe51", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "custom", - "display_name": "Custom" + "slug": "recognizer", + "display_name": "Recognizer" } }, { "model": "process.workertype", - "pk": "e84d4893-ae73-4162-acb6-13d606587644", + "pk": "f2a53f25-1ffe-423e-b9c4-ee5a01a2b5a0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "slug": "dla", - "display_name": "Document Layout Analysis" + "slug": "import", + "display_name": "Import" } }, { "model": "process.workerversion", - "pk": "229fc630-728b-421b-a2de-9618bd8e401f", + "pk": "148946e9-5ddd-42d2-ad29-bf78ca55f62c", "fields": { - "worker": "667a82ac-a19a-4e97-9239-35a3026246cb", - "revision": null, - "version": 1, + "worker": "8a27660c-f976-4550-834c-84d4ad033214", + "revision": "179c0b17-cfa7-4157-bba0-83c83d58ee62", + "version": null, "configuration": { - "custom": "value" + "test": 42 }, - "state": "created", - "gpu_usage": "disabled", + "state": "available", + "gpu_usage": "required", "model_usage": "disabled", "docker_image": null, - "docker_image_iid": null, + "docker_image_iid": "registry.somewhere.com/something:latest", "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z" } }, { "model": "process.workerversion", - "pk": "57cd018c-43e2-491e-9684-3b939df52921", + "pk": "194b33ea-4a45-4884-9a89-dd04c5a65fc3", "fields": { - "worker": "df6dbbb2-0526-46f9-8da2-5ada8f93826a", - "revision": "2d087aa1-82ba-4ec5-af14-c19e4213f913", - "version": null, - "configuration": {}, - "state": "available", + "worker": "af68bb34-5eaa-4edc-81fa-4400af06a715", + "revision": null, + "version": 1, + "configuration": { + "custom": "value" + }, + "state": "created", "gpu_usage": "disabled", "model_usage": "disabled", "docker_image": null, - "docker_image_iid": "registry.somewhere.com/something:latest", + "docker_image_iid": null, "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z" } }, { "model": "process.workerversion", - "pk": "665fdbad-798b-434b-9ab0-95f7522c19d6", + "pk": "505988ea-21cb-4011-ad87-48bdab37237d", "fields": { - "worker": "1a18dc1d-49e3-4724-b1e3-cbc8da483610", - "revision": "2d087aa1-82ba-4ec5-af14-c19e4213f913", + "worker": "7290f39e-fa17-4e7f-a051-a0e573c3ff6b", + "revision": "179c0b17-cfa7-4157-bba0-83c83d58ee62", "version": null, "configuration": { "test": 42 }, "state": "available", - "gpu_usage": "required", + "gpu_usage": "disabled", "model_usage": "disabled", "docker_image": null, "docker_image_iid": "registry.somewhere.com/something:latest", @@ -312,10 +314,10 @@ }, { "model": "process.workerversion", - "pk": "88c64ea9-46e0-46fa-a65f-37eeaed3ded0", + "pk": "6ee6d026-1ea4-4435-98a1-a811aed6304e", "fields": { - "worker": "219444d1-2a84-49f4-94d6-d5c21e36fcb9", - "revision": "2d087aa1-82ba-4ec5-af14-c19e4213f913", + "worker": "7c61dba0-4fdc-494f-b587-abddc75b8dbb", + "revision": "179c0b17-cfa7-4157-bba0-83c83d58ee62", "version": null, "configuration": { "test": 42 @@ -331,10 +333,10 @@ }, { "model": "process.workerversion", - "pk": "928cfe44-77bc-4cb1-84d2-f02006bd61d5", + "pk": "9da1ac9d-0ff7-44fc-923f-3076a5bead35", "fields": { - "worker": "12b2344f-4871-40ad-aa1b-efaa70b5823f", - "revision": "2d087aa1-82ba-4ec5-af14-c19e4213f913", + "worker": "bc503daa-b53f-45ef-9d90-7f6a24d028a1", + "revision": "179c0b17-cfa7-4157-bba0-83c83d58ee62", "version": null, "configuration": { "test": 42 @@ -350,14 +352,12 @@ }, { "model": "process.workerversion", - "pk": "f62b2301-0447-4848-9848-a731ad801d60", + "pk": "d822aa98-530d-46a9-b82b-d64eeb4668a6", "fields": { - "worker": "9775f6a1-6238-4943-b245-b69bde621912", - "revision": "2d087aa1-82ba-4ec5-af14-c19e4213f913", + "worker": "d757e5ad-a03e-4e81-bf81-0b2a8719220e", + "revision": "179c0b17-cfa7-4157-bba0-83c83d58ee62", "version": null, - "configuration": { - "test": 42 - }, + "configuration": {}, "state": "available", "gpu_usage": "disabled", "model_usage": "disabled", @@ -369,10 +369,10 @@ }, { "model": "process.workerrun", - "pk": "14356945-e3a4-4d8b-b9b2-b4bc5e9287cc", + "pk": "2dee3bf1-8c3a-4999-ae74-6801a141eea5", "fields": { - "process": "76506eee-43ab-4caa-966c-9e8e5d10ef93", - "version": "229fc630-728b-421b-a2de-9618bd8e401f", + "process": "fb8b6046-9aa6-454d-8e0b-472f93605016", + "version": "194b33ea-4a45-4884-9a89-dd04c5a65fc3", "model_version": null, "parents": "[]", "configuration": null, @@ -384,14 +384,14 @@ }, { "model": "process.workerrun", - "pk": "2f828b7f-c169-4a8e-9555-0b4060e05641", + "pk": "54d16192-ce9d-4fad-9e4b-b5692787a03a", "fields": { - "process": "7a6fed8c-ed9c-4714-8036-7048462ce0f2", - "version": "928cfe44-77bc-4cb1-84d2-f02006bd61d5", + "process": "6fb4ded9-ab97-4764-b0b0-4a7040556cde", + "version": "9da1ac9d-0ff7-44fc-923f-3076a5bead35", "model_version": null, "parents": "[]", "configuration": null, - "summary": "Worker Document layout analyser @ 928cfe", + "summary": "Worker Document layout analyser @ 9da1ac", "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "has_results": false @@ -399,14 +399,14 @@ }, { "model": "process.workerrun", - "pk": "25e12096-d55e-4137-a954-3c344fc244bd", + "pk": "c2f94771-d640-4c59-9920-a602c85fba61", "fields": { - "process": "7a6fed8c-ed9c-4714-8036-7048462ce0f2", - "version": "f62b2301-0447-4848-9848-a731ad801d60", + "process": "6fb4ded9-ab97-4764-b0b0-4a7040556cde", + "version": "505988ea-21cb-4011-ad87-48bdab37237d", "model_version": null, - "parents": "[\"2f828b7f-c169-4a8e-9555-0b4060e05641\"]", + "parents": "[\"54d16192-ce9d-4fad-9e4b-b5692787a03a\"]", "configuration": null, - "summary": "Worker Recognizer @ f62b23", + "summary": "Worker Recognizer @ 505988", "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "has_results": false @@ -414,10 +414,10 @@ }, { "model": "process.workerrun", - "pk": "c2eb8dbb-584e-460a-8f0d-b6a7dc79f838", + "pk": "f3246aec-23a7-4127-8368-5bec73b3e7cc", "fields": { - "process": "b6b7dbcb-e134-4274-93fd-9d6d06818c6f", - "version": "229fc630-728b-421b-a2de-9618bd8e401f", + "process": "89fbd695-bd87-4a1d-afef-8647a30d3ccc", + "version": "194b33ea-4a45-4884-9a89-dd04c5a65fc3", "model_version": null, "parents": "[]", "configuration": null, @@ -429,7 +429,7 @@ }, { "model": "documents.corpus", - "pk": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "pk": "35d72d0a-7d7a-4165-ad65-a863725a470f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -442,9 +442,9 @@ }, { "model": "documents.elementtype", - "pk": "329625fb-1514-492a-9567-e231f7646648", + "pk": "264b7ce8-9232-49db-bda8-6c081c6817a0", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "slug": "page", "display_name": "Page", "folder": false, @@ -454,9 +454,9 @@ }, { "model": "documents.elementtype", - "pk": "ab1c133b-71d0-4928-b388-7c12345a8416", + "pk": "81ac6918-ad4b-4b0d-a9e2-b6c3b4e2421f", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "slug": "text_line", "display_name": "Line", "folder": false, @@ -466,9 +466,9 @@ }, { "model": "documents.elementtype", - "pk": "c196f23a-6843-48e1-a885-c7cda27e64ca", + "pk": "879b6e87-d531-4f61-a5fd-a943f6458e4f", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "slug": "act", "display_name": "Act", "folder": false, @@ -478,9 +478,9 @@ }, { "model": "documents.elementtype", - "pk": "d15930a5-ebb8-40aa-968d-8bd2cf504230", + "pk": "917c264f-4b42-4335-a21f-9518056a7915", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "slug": "word", "display_name": "Word", "folder": false, @@ -490,9 +490,9 @@ }, { "model": "documents.elementtype", - "pk": "e28100cb-49a4-4837-a9ae-81a519ebb43f", + "pk": "9c789715-ee58-4259-af53-6a83d5448223", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "slug": "volume", "display_name": "Volume", "folder": true, @@ -502,9 +502,9 @@ }, { "model": "documents.elementtype", - "pk": "efc85dfd-913e-4979-bb60-d6bf0c7dcd2d", + "pk": "fa2c1395-9f8d-498e-84c4-db996ec47e03", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "slug": "surface", "display_name": "Surface", "folder": false, @@ -514,278 +514,278 @@ }, { "model": "documents.elementpath", - "pk": "06ab62e0-e9e7-4353-967d-a798806f7e8a", + "pk": "035f294c-06c7-4acf-b3af-2fdff88efdaf", "fields": { - "element": "9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 0 + "element": "f531f9a8-b826-4e87-bad3-edcd17cd435f", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"700c1fd5-1f67-4180-aea0-9d6533df44a4\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "156e125d-4550-4a72-a124-3b5e7fe25e77", + "pk": "04d6c275-9b31-4af2-8dfe-0065e7527403", "fields": { - "element": "c8e488da-28d8-467e-a651-7d6f2ad6600d", - "path": "[\"2cd80ca1-7d55-4750-8c90-2b693e14a058\"]", - "ordering": 2 + "element": "17517c85-cf36-4841-9577-391c8b343914", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"eedc30d8-ef94-40f6-843e-1900d15e47de\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "17526830-1f55-41e1-82c5-6faf34831a0d", + "pk": "0907474b-202a-4e14-8b4f-3614a17be95c", "fields": { - "element": "7ea6161f-62f5-4bdf-9412-bbc4d13b3e6f", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"d9744e02-66ef-424e-a71c-88b94b209688\"]", - "ordering": 0 + "element": "f20e8dd9-285b-4e9c-ba5c-4f63cdb2aaf8", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"700c1fd5-1f67-4180-aea0-9d6533df44a4\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "210c3ff2-1aac-4b1d-bcff-67a76b2bc4f9", + "pk": "0b627a4d-277c-4448-a21e-07a47f2ba855", "fields": { - "element": "e7027313-7910-44b4-bdff-28e8d187c0b4", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 5 + "element": "49b598b6-07a5-468f-aac0-e94a4ad06a6d", + "path": "[]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "21af7d64-10db-43f4-a0dd-386be73314cd", + "pk": "12c39026-aa6b-4d71-b3e9-94fef1256ef7", "fields": { - "element": "2df1881f-d666-43c9-8af7-507b277c4e23", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"02a6f67e-c7d4-4dec-b3a0-9f9ab3f2ee99\"]", + "element": "2dbf8cc9-cc10-4604-8eb5-de5f3718aa17", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "262709fc-3d1f-46e2-8ba5-5c63b8d2b96f", + "pk": "13b02e99-d065-4b34-92fb-33a653d58c13", "fields": { - "element": "d9744e02-66ef-424e-a71c-88b94b209688", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 7 + "element": "1c02b4cc-858a-47bc-abaf-2549df1ef749", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"a433b9be-bf1d-4abd-840d-68401750c442\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "32644adb-c33b-47bd-befe-f5d2142d60ee", + "pk": "2c99fbb0-fde9-4e4f-ae87-c1e74e34edf9", "fields": { - "element": "ec742315-a1e6-4cf2-96c0-abe2b0e0e3fc", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4\"]", - "ordering": 2 + "element": "700c1fd5-1f67-4180-aea0-9d6533df44a4", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "3a86e5d2-3c7c-474f-82b1-60dae09a82b4", + "pk": "493be254-9117-474d-9a1a-16c7ed4c70d2", "fields": { - "element": "dc66eac3-a4e9-4c55-a4a8-d239bd79f3f6", - "path": "[\"2cd80ca1-7d55-4750-8c90-2b693e14a058\"]", - "ordering": 1 + "element": "a9a1a2ea-72bd-4649-9e12-e922884bd0e0", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"7d480460-0ab0-4471-a445-f72f2be75b9a\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "5a8ae44a-8e2c-4eed-a1f1-c7ddcd4a093d", + "pk": "64a5fefb-5132-48ee-bad9-03eadcb30b68", "fields": { - "element": "b8d9cf10-dc77-4a41-90f8-badf9718ebb9", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"f530bd2f-d0ab-4f14-9d98-087486c4b1ab\"]", + "element": "173b6549-a747-446d-b073-567ec53f4a10", + "path": "[\"99777763-3b04-4a00-a6bc-ffdfe840243f\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "650ef880-9d63-4164-8d50-b7dff16ca738", + "pk": "7e5c2329-ccd5-45c3-9192-b5815a35de00", "fields": { - "element": "aec1257f-bcbe-41d5-8983-966da539e912", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"e7027313-7910-44b4-bdff-28e8d187c0b4\"]", - "ordering": 0 + "element": "fc21b00e-fe72-4be3-83f7-beb2b775f0a3", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"21e28831-aac0-4e11-b44a-4bcbd0f4ddf7\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "666416c1-5aa0-4f57-a21f-b7585b10efd2", + "pk": "88c8f446-d600-4750-9ebc-c1514b67a645", "fields": { - "element": "f75ca797-0067-4711-b899-6ed39b0a6f1a", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 4 + "element": "a433b9be-bf1d-4abd-840d-68401750c442", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", + "ordering": 3 } }, { "model": "documents.elementpath", - "pk": "6f42e63d-d4ba-495b-8e22-e059577c669f", + "pk": "9d405bca-c0c0-4ce2-ad98-b4ff2c292aaf", "fields": { - "element": "f530bd2f-d0ab-4f14-9d98-087486c4b1ab", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 1 + "element": "2b49f87c-e879-48b1-a7c2-9f173f62baf5", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"700c1fd5-1f67-4180-aea0-9d6533df44a4\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "72864cf7-e163-47c5-9a40-49a253e8cece", + "pk": "a394a82c-8ee9-4fce-a1c4-3b4475d85477", "fields": { - "element": "39210eb6-70a9-4660-8a45-5c6957508936", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 3 + "element": "17419d59-90af-4e7f-8052-5425e5444e80", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"2dbf8cc9-cc10-4604-8eb5-de5f3718aa17\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "733ddae3-62e2-433a-b322-41bb6502ac93", + "pk": "a6089453-c63c-49a8-8a9d-fc05eac6d7c8", "fields": { - "element": "34cd2540-0a05-4812-add5-f24c08d46f73", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"1d0f7ea6-6455-4a8d-ade2-a853f8ab7e08\"]", - "ordering": 0 + "element": "7841e9f9-f71f-4582-8cca-844cb55210be", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"93c849a1-9e56-4fc8-a3a7-14200103482c\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "79ecb1ed-8036-4e88-a659-605fb3b5c8aa", + "pk": "a788c955-06f6-404d-a48c-4526089a6589", "fields": { - "element": "1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02", - "path": "[]", - "ordering": 0 + "element": "7f3e6a59-1bf6-4236-afba-dcecbfb38088", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"700c1fd5-1f67-4180-aea0-9d6533df44a4\"]", + "ordering": 3 } }, { "model": "documents.elementpath", - "pk": "7d21ba65-6fc5-47db-995c-b4d8e8520c7c", + "pk": "b2cfb1dd-4d11-4b9e-b757-03ce49f20d10", "fields": { - "element": "9754d5ef-2888-4083-97b0-c9593321d8d1", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4\"]", - "ordering": 3 + "element": "580235a9-4050-45f2-b194-2c62ef69051e", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"93c849a1-9e56-4fc8-a3a7-14200103482c\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "88d64d5e-3fba-450d-a9a8-5c7424559bbf", + "pk": "b30f2781-b3e3-4e13-b767-2dc354752772", "fields": { - "element": "2039b7b8-15cb-4b1f-a9e6-c681aa20e13a", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"f530bd2f-d0ab-4f14-9d98-087486c4b1ab\"]", + "element": "93c849a1-9e56-4fc8-a3a7-14200103482c", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "968de6aa-0d1c-47cc-ad94-02c2b59a2d79", + "pk": "b47d5414-1cbe-46f7-9a8c-7e11bcd5f5b8", "fields": { - "element": "9b478489-dd49-431b-8c51-f78ba7699f30", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"f75ca797-0067-4711-b899-6ed39b0a6f1a\"]", - "ordering": 0 + "element": "7d480460-0ab0-4471-a445-f72f2be75b9a", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", + "ordering": 6 } }, { "model": "documents.elementpath", - "pk": "9bdf9f6d-b114-443e-86fb-592a0ad362af", + "pk": "c1022f9a-8b8f-4c9c-abc6-0742b78be4cf", "fields": { - "element": "e9aa6eb2-eb0b-4503-9bbf-c3d27b9cce5f", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4\"]", - "ordering": 1 + "element": "ea1947f6-dc20-447f-87f3-c46ef20cb3f1", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"d23ebc38-5318-46fc-bc1a-62466806723d\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "9c452d82-87c2-406c-add5-2556559e14f5", + "pk": "c724d4e4-e4df-48ab-9e19-851ac25674be", "fields": { - "element": "d82d4181-3ec4-45c2-b2cc-825d1382274f", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"f75ca797-0067-4711-b899-6ed39b0a6f1a\"]", + "element": "786a05b3-28db-45f9-9386-d86e4bd2370e", + "path": "[\"99777763-3b04-4a00-a6bc-ffdfe840243f\"]", "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "a599aafe-89d3-4589-8c6c-e20bf9f32efa", + "pk": "c8d274f4-fef2-4cbf-b6e2-455024b6b288", "fields": { - "element": "02a6f67e-c7d4-4dec-b3a0-9f9ab3f2ee99", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 2 + "element": "599a7d5a-63c5-4695-b6e3-8b164fbc9ebb", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"2dbf8cc9-cc10-4604-8eb5-de5f3718aa17\"]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "b6ddde5c-f6de-433b-b9c3-c9afa288216f", + "pk": "c9a3c855-9735-4e0d-916c-573bf789a142", "fields": { - "element": "1d0f7ea6-6455-4a8d-ade2-a853f8ab7e08", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\"]", - "ordering": 6 + "element": "99777763-3b04-4a00-a6bc-ffdfe840243f", + "path": "[]", + "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "c0f23fcf-1c63-4af2-b8b2-af0865b6dd9e", + "pk": "ca75fe24-7090-438d-a822-20a228987cc3", "fields": { - "element": "2e8101c4-6fca-49b6-9b7b-41554facfa01", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"02a6f67e-c7d4-4dec-b3a0-9f9ab3f2ee99\"]", - "ordering": 2 + "element": "d23ebc38-5318-46fc-bc1a-62466806723d", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", + "ordering": 5 } }, { "model": "documents.elementpath", - "pk": "c614c8aa-de22-477f-bdb1-ef83ce7ab055", + "pk": "cc426fcb-7bf6-42da-b4d1-ed6aa49296cf", "fields": { - "element": "4fed0e6a-9e3f-4e59-9c70-c5e74cc6d73c", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"02a6f67e-c7d4-4dec-b3a0-9f9ab3f2ee99\"]", - "ordering": 0 + "element": "236b450d-874b-40c9-9945-a7307a3be8d9", + "path": "[\"99777763-3b04-4a00-a6bc-ffdfe840243f\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "c8bc7413-c96b-4047-8782-87a37cfc520b", + "pk": "d1a618e3-a489-479d-ba73-22f111f8e94b", "fields": { - "element": "344f6b5b-b001-4084-950c-1925fdd2eef4", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4\"]", + "element": "c31953e0-1723-45ba-9fd2-4e06611b164a", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"21e28831-aac0-4e11-b44a-4bcbd0f4ddf7\"]", "ordering": 0 } }, { "model": "documents.elementpath", - "pk": "d06e1ff9-6dd5-4808-a035-bb2a0e7bd428", + "pk": "d2e92f0f-0551-4e12-8fc5-d2d85f9b458c", "fields": { - "element": "2cd80ca1-7d55-4750-8c90-2b693e14a058", - "path": "[]", - "ordering": 0 + "element": "eedc30d8-ef94-40f6-843e-1900d15e47de", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", + "ordering": 7 } }, { "model": "documents.elementpath", - "pk": "d14e67c9-f5a9-4c81-ae3d-64599eb166a4", + "pk": "dab68042-3da7-4bec-8a52-ae8fce056ca2", "fields": { - "element": "b3c0b2bb-f53e-401a-ae19-bc7f2ec31179", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"39210eb6-70a9-4660-8a45-5c6957508936\"]", - "ordering": 0 + "element": "642de90b-27a0-4eb3-8742-000faa53f765", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"93c849a1-9e56-4fc8-a3a7-14200103482c\"]", + "ordering": 1 } }, { "model": "documents.elementpath", - "pk": "e3be0287-6213-4588-9407-6407c0de1d72", + "pk": "dc2c3dde-fab6-4db7-8fec-cffe85ed4564", "fields": { - "element": "9c42123c-b940-4695-b083-5b19392f68a6", - "path": "[\"2cd80ca1-7d55-4750-8c90-2b693e14a058\"]", - "ordering": 0 + "element": "43f1b64e-44a9-4648-a46c-0b605b420bd7", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\", \"2dbf8cc9-cc10-4604-8eb5-de5f3718aa17\"]", + "ordering": 2 } }, { "model": "documents.elementpath", - "pk": "f4e4b076-bbfc-4c4e-90d2-060f1c0f3c89", + "pk": "f6f57c2b-3d4b-4e80-b320-d4d7acc38935", "fields": { - "element": "86304b47-3d6a-48dd-a903-d0e09cde91ba", - "path": "[\"1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02\", \"f530bd2f-d0ab-4f14-9d98-087486c4b1ab\"]", - "ordering": 1 + "element": "21e28831-aac0-4e11-b44a-4bcbd0f4ddf7", + "path": "[\"49b598b6-07a5-468f-aac0-e94a4ad06a6d\"]", + "ordering": 4 } }, { "model": "documents.element", - "pk": "02a6f67e-c7d4-4dec-b3a0-9f9ab3f2ee99", + "pk": "173b6549-a747-446d-b073-567ec53f4a10", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "329625fb-1514-492a-9567-e231f7646648", - "name": "Volume 1, page 2r", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "264b7ce8-9232-49db-bda8-6c081c6817a0", + "name": "Volume 2, page 1r", "creator": null, "worker_version": null, "worker_run": null, - "image": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", + "image": "0fceb46b-af14-4568-984e-ac26ceb06fa4", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, @@ -794,18 +794,18 @@ }, { "model": "documents.element", - "pk": "1d0f7ea6-6455-4a8d-ade2-a853f8ab7e08", + "pk": "17419d59-90af-4e7f-8052-5425e5444e80", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "c196f23a-6843-48e1-a885-c7cda27e64ca", - "name": "Act 4", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", + "name": "ROY", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "a4fef864-b712-41df-97cf-29b774450df8", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -813,18 +813,18 @@ }, { "model": "documents.element", - "pk": "1d4b82fe-f9c5-4012-a9ae-c2ff83c9cc02", + "pk": "17517c85-cf36-4841-9577-391c8b343914", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "e28100cb-49a4-4837-a9ae-81a519ebb43f", - "name": "Volume 1", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "fa2c1395-9f8d-498e-84c4-db996ec47e03", + "name": "Surface F", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", + "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -832,18 +832,18 @@ }, { "model": "documents.element", - "pk": "2039b7b8-15cb-4b1f-a9e6-c681aa20e13a", + "pk": "1c02b4cc-858a-47bc-abaf-2549df1ef749", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", - "name": "DATUM", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "fa2c1395-9f8d-498e-84c4-db996ec47e03", + "name": "Surface A", "creator": null, "worker_version": null, "worker_run": null, - "image": "f5ddf7e0-3dd3-4f4f-ada6-f42de7be869d", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", + "image": "bd3c8ee4-358a-4a50-90f6-1080b910d615", + "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -851,13 +851,13 @@ }, { "model": "documents.element", - "pk": "2cd80ca1-7d55-4750-8c90-2b693e14a058", + "pk": "21e28831-aac0-4e11-b44a-4bcbd0f4ddf7", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "e28100cb-49a4-4837-a9ae-81a519ebb43f", - "name": "Volume 2", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "879b6e87-d531-4f61-a5fd-a943f6458e4f", + "name": "Act 2", "creator": null, "worker_version": null, "worker_run": null, @@ -870,18 +870,18 @@ }, { "model": "documents.element", - "pk": "2df1881f-d666-43c9-8af7-507b277c4e23", + "pk": "236b450d-874b-40c9-9945-a7307a3be8d9", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", - "name": "ROY", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "264b7ce8-9232-49db-bda8-6c081c6817a0", + "name": "Volume 2, page 2r", "creator": null, "worker_version": null, "worker_run": null, - "image": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", + "image": "a43940c0-254f-4920-b725-7f063bcc0b50", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -889,18 +889,18 @@ }, { "model": "documents.element", - "pk": "2e8101c4-6fca-49b6-9b7b-41554facfa01", + "pk": "2b49f87c-e879-48b1-a7c2-9f173f62baf5", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", - "name": "DATUM", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", + "name": "PARIS", "creator": null, "worker_version": null, "worker_run": null, - "image": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", - "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", + "image": "bd3c8ee4-358a-4a50-90f6-1080b910d615", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -908,18 +908,18 @@ }, { "model": "documents.element", - "pk": "344f6b5b-b001-4084-950c-1925fdd2eef4", + "pk": "2dbf8cc9-cc10-4604-8eb5-de5f3718aa17", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", - "name": "PARIS", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "264b7ce8-9232-49db-bda8-6c081c6817a0", + "name": "Volume 1, page 1v", "creator": null, "worker_version": null, "worker_run": null, - "image": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", + "image": "a4fef864-b712-41df-97cf-29b774450df8", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -927,18 +927,18 @@ }, { "model": "documents.element", - "pk": "34cd2540-0a05-4812-add5-f24c08d46f73", + "pk": "43f1b64e-44a9-4648-a46c-0b605b420bd7", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "efc85dfd-913e-4979-bb60-d6bf0c7dcd2d", - "name": "Surface E", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", + "name": "DATUM", "creator": null, "worker_version": null, "worker_run": null, - "image": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", - "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)", + "image": "a4fef864-b712-41df-97cf-29b774450df8", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -946,13 +946,13 @@ }, { "model": "documents.element", - "pk": "39210eb6-70a9-4660-8a45-5c6957508936", + "pk": "49b598b6-07a5-468f-aac0-e94a4ad06a6d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "c196f23a-6843-48e1-a885-c7cda27e64ca", - "name": "Act 1", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "9c789715-ee58-4259-af53-6a83d5448223", + "name": "Volume 1", "creator": null, "worker_version": null, "worker_run": null, @@ -965,17 +965,17 @@ }, { "model": "documents.element", - "pk": "4fed0e6a-9e3f-4e59-9c70-c5e74cc6d73c", + "pk": "580235a9-4050-45f2-b194-2c62ef69051e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", "name": "PARIS", "creator": null, "worker_version": null, "worker_run": null, - "image": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", + "image": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", "rotation_angle": 0, "mirrored": false, @@ -984,18 +984,18 @@ }, { "model": "documents.element", - "pk": "7ea6161f-62f5-4bdf-9412-bbc4d13b3e6f", + "pk": "599a7d5a-63c5-4695-b6e3-8b164fbc9ebb", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "efc85dfd-913e-4979-bb60-d6bf0c7dcd2d", - "name": "Surface F", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", + "name": "PARIS", "creator": null, "worker_version": null, "worker_run": null, - "image": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", - "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", + "image": "a4fef864-b712-41df-97cf-29b774450df8", + "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1003,17 +1003,17 @@ }, { "model": "documents.element", - "pk": "86304b47-3d6a-48dd-a903-d0e09cde91ba", + "pk": "642de90b-27a0-4eb3-8742-000faa53f765", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", "name": "ROY", "creator": null, "worker_version": null, "worker_run": null, - "image": "f5ddf7e0-3dd3-4f4f-ada6-f42de7be869d", + "image": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, @@ -1022,18 +1022,18 @@ }, { "model": "documents.element", - "pk": "9754d5ef-2888-4083-97b0-c9593321d8d1", + "pk": "700c1fd5-1f67-4180-aea0-9d6533df44a4", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "ab1c133b-71d0-4928-b388-7c12345a8416", - "name": "Text line", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "264b7ce8-9232-49db-bda8-6c081c6817a0", + "name": "Volume 1, page 1r", "creator": null, "worker_version": null, "worker_run": null, - "image": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", + "image": "bd3c8ee4-358a-4a50-90f6-1080b910d615", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1041,18 +1041,18 @@ }, { "model": "documents.element", - "pk": "9b478489-dd49-431b-8c51-f78ba7699f30", + "pk": "7841e9f9-f71f-4582-8cca-844cb55210be", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "efc85dfd-913e-4979-bb60-d6bf0c7dcd2d", - "name": "Surface B", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", + "name": "DATUM", "creator": null, "worker_version": null, "worker_run": null, - "image": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", - "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", + "image": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", + "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1060,17 +1060,17 @@ }, { "model": "documents.element", - "pk": "9c42123c-b940-4695-b083-5b19392f68a6", + "pk": "786a05b3-28db-45f9-9386-d86e4bd2370e", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "329625fb-1514-492a-9567-e231f7646648", - "name": "Volume 2, page 1r", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "264b7ce8-9232-49db-bda8-6c081c6817a0", + "name": "Volume 2, page 1v", "creator": null, "worker_version": null, "worker_run": null, - "image": "468df2f7-e22a-4dba-96d2-9c464ba6c2b0", + "image": "7b3e5017-0c57-45da-aef8-fff14feba411", "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, @@ -1079,18 +1079,18 @@ }, { "model": "documents.element", - "pk": "9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4", + "pk": "7d480460-0ab0-4471-a445-f72f2be75b9a", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "329625fb-1514-492a-9567-e231f7646648", - "name": "Volume 1, page 1r", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "879b6e87-d531-4f61-a5fd-a943f6458e4f", + "name": "Act 4", "creator": null, "worker_version": null, "worker_run": null, - "image": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1098,18 +1098,18 @@ }, { "model": "documents.element", - "pk": "aec1257f-bcbe-41d5-8983-966da539e912", + "pk": "7f3e6a59-1bf6-4236-afba-dcecbfb38088", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "efc85dfd-913e-4979-bb60-d6bf0c7dcd2d", - "name": "Surface D", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "81ac6918-ad4b-4b0d-a9e2-b6c3b4e2421f", + "name": "Text line", "creator": null, "worker_version": null, "worker_run": null, - "image": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", - "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)", + "image": "bd3c8ee4-358a-4a50-90f6-1080b910d615", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1117,18 +1117,18 @@ }, { "model": "documents.element", - "pk": "b3c0b2bb-f53e-401a-ae19-bc7f2ec31179", + "pk": "93c849a1-9e56-4fc8-a3a7-14200103482c", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "efc85dfd-913e-4979-bb60-d6bf0c7dcd2d", - "name": "Surface A", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "264b7ce8-9232-49db-bda8-6c081c6817a0", + "name": "Volume 1, page 2r", "creator": null, "worker_version": null, "worker_run": null, - "image": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", - "polygon": "LINEARRING (0 0, 0 600, 600 600, 600 0, 0 0)", + "image": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1136,18 +1136,18 @@ }, { "model": "documents.element", - "pk": "b8d9cf10-dc77-4a41-90f8-badf9718ebb9", + "pk": "99777763-3b04-4a00-a6bc-ffdfe840243f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", - "name": "PARIS", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "9c789715-ee58-4259-af53-6a83d5448223", + "name": "Volume 2", "creator": null, "worker_version": null, "worker_run": null, - "image": "f5ddf7e0-3dd3-4f4f-ada6-f42de7be869d", - "polygon": "LINEARRING (100 100, 100 200, 200 200, 200 100, 100 100)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1155,18 +1155,18 @@ }, { "model": "documents.element", - "pk": "c8e488da-28d8-467e-a651-7d6f2ad6600d", + "pk": "a433b9be-bf1d-4abd-840d-68401750c442", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "329625fb-1514-492a-9567-e231f7646648", - "name": "Volume 2, page 2r", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "879b6e87-d531-4f61-a5fd-a943f6458e4f", + "name": "Act 1", "creator": null, "worker_version": null, "worker_run": null, - "image": "53421855-db25-4abd-aba4-d86710a76d5d", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1174,18 +1174,18 @@ }, { "model": "documents.element", - "pk": "d82d4181-3ec4-45c2-b2cc-825d1382274f", + "pk": "a9a1a2ea-72bd-4649-9e12-e922884bd0e0", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "efc85dfd-913e-4979-bb60-d6bf0c7dcd2d", - "name": "Surface C", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "fa2c1395-9f8d-498e-84c4-db996ec47e03", + "name": "Surface E", "creator": null, "worker_version": null, "worker_run": null, - "image": "f5ddf7e0-3dd3-4f4f-ada6-f42de7be869d", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", + "polygon": "LINEARRING (300 300, 300 600, 600 600, 600 300, 300 300)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1193,18 +1193,18 @@ }, { "model": "documents.element", - "pk": "d9744e02-66ef-424e-a71c-88b94b209688", + "pk": "c31953e0-1723-45ba-9fd2-4e06611b164a", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "c196f23a-6843-48e1-a885-c7cda27e64ca", - "name": "Act 5", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "fa2c1395-9f8d-498e-84c4-db996ec47e03", + "name": "Surface B", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "bd3c8ee4-358a-4a50-90f6-1080b910d615", + "polygon": "LINEARRING (600 600, 600 1000, 1000 1000, 1000 600, 600 600)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1212,18 +1212,18 @@ }, { "model": "documents.element", - "pk": "dc66eac3-a4e9-4c55-a4a8-d239bd79f3f6", + "pk": "d23ebc38-5318-46fc-bc1a-62466806723d", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "329625fb-1514-492a-9567-e231f7646648", - "name": "Volume 2, page 1v", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "879b6e87-d531-4f61-a5fd-a943f6458e4f", + "name": "Act 3", "creator": null, "worker_version": null, "worker_run": null, - "image": "27c0757d-4d15-4f0e-bfc1-ec63bfada9e7", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1231,18 +1231,18 @@ }, { "model": "documents.element", - "pk": "e7027313-7910-44b4-bdff-28e8d187c0b4", + "pk": "ea1947f6-dc20-447f-87f3-c46ef20cb3f1", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "c196f23a-6843-48e1-a885-c7cda27e64ca", - "name": "Act 3", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "fa2c1395-9f8d-498e-84c4-db996ec47e03", + "name": "Surface D", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", + "polygon": "LINEARRING (0 0, 0 300, 300 300, 300 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1250,18 +1250,18 @@ }, { "model": "documents.element", - "pk": "e9aa6eb2-eb0b-4503-9bbf-c3d27b9cce5f", + "pk": "eedc30d8-ef94-40f6-843e-1900d15e47de", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", - "name": "ROY", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "879b6e87-d531-4f61-a5fd-a943f6458e4f", + "name": "Act 5", "creator": null, "worker_version": null, "worker_run": null, - "image": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", - "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", + "image": null, + "polygon": null, "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1269,17 +1269,17 @@ }, { "model": "documents.element", - "pk": "ec742315-a1e6-4cf2-96c0-abe2b0e0e3fc", + "pk": "f20e8dd9-285b-4e9c-ba5c-4f63cdb2aaf8", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "d15930a5-ebb8-40aa-968d-8bd2cf504230", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", "name": "DATUM", "creator": null, "worker_version": null, "worker_run": null, - "image": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", + "image": "bd3c8ee4-358a-4a50-90f6-1080b910d615", "polygon": "LINEARRING (700 700, 700 800, 800 800, 800 700, 700 700)", "rotation_angle": 0, "mirrored": false, @@ -1288,18 +1288,18 @@ }, { "model": "documents.element", - "pk": "f530bd2f-d0ab-4f14-9d98-087486c4b1ab", + "pk": "f531f9a8-b826-4e87-bad3-edcd17cd435f", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "329625fb-1514-492a-9567-e231f7646648", - "name": "Volume 1, page 1v", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "917c264f-4b42-4335-a21f-9518056a7915", + "name": "ROY", "creator": null, "worker_version": null, "worker_run": null, - "image": "f5ddf7e0-3dd3-4f4f-ada6-f42de7be869d", - "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", + "image": "bd3c8ee4-358a-4a50-90f6-1080b910d615", + "polygon": "LINEARRING (400 400, 400 500, 500 500, 500 400, 400 400)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1307,18 +1307,18 @@ }, { "model": "documents.element", - "pk": "f75ca797-0067-4711-b899-6ed39b0a6f1a", + "pk": "fc21b00e-fe72-4be3-83f7-beb2b775f0a3", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "c196f23a-6843-48e1-a885-c7cda27e64ca", - "name": "Act 2", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "fa2c1395-9f8d-498e-84c4-db996ec47e03", + "name": "Surface C", "creator": null, "worker_version": null, "worker_run": null, - "image": null, - "polygon": null, + "image": "a4fef864-b712-41df-97cf-29b774450df8", + "polygon": "LINEARRING (0 0, 0 1000, 1000 1000, 1000 0, 0 0)", "rotation_angle": 0, "mirrored": false, "confidence": null @@ -1326,103 +1326,103 @@ }, { "model": "documents.entitytype", - "pk": "0199d0f3-a2e6-4422-9202-2be9fe6d9dff", + "pk": "01c8c770-8bc0-4383-a87b-865439cad9c6", "fields": { - "name": "number", + "name": "location", "color": "ff0000", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12" + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f" } }, { "model": "documents.entitytype", - "pk": "2b56b955-e322-4cb3-b66c-9de1ead52ead", + "pk": "932468ff-0f6a-4c87-8e42-cbdfeb4de820", "fields": { - "name": "location", + "name": "organization", "color": "ff0000", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12" + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f" } }, { "model": "documents.entitytype", - "pk": "493a7fd6-4d9b-4a42-bb1c-d4336852cf43", + "pk": "a9b5bd96-fe1e-41b0-86d5-1686b5364a95", "fields": { - "name": "organization", + "name": "person", "color": "ff0000", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12" + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f" } }, { "model": "documents.entitytype", - "pk": "7051555a-449f-4ebb-a519-f5b65517a520", + "pk": "ba17ef5d-c875-4a13-b59c-7528417a6cb7", "fields": { - "name": "person", + "name": "number", "color": "ff0000", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12" + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f" } }, { "model": "documents.entitytype", - "pk": "c6cb242c-688d-484f-87d1-42cabd41f67f", + "pk": "f9659fe1-0955-4b9e-acff-561a0cb7356a", "fields": { "name": "date", "color": "ff0000", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12" + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f" } }, { "model": "documents.transcription", - "pk": "05882194-0210-441a-a167-f0b58cca0d2e", + "pk": "2b15b4fd-d26b-4bce-8610-20348ad44887", "fields": { - "element": "2df1881f-d666-43c9-8af7-507b277c4e23", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "2b49f87c-e879-48b1-a7c2-9f173f62baf5", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, - "text": "ROY", + "text": "PARIS", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "29dca1b6-8661-4bb5-933f-0b05ac7c30bb", + "pk": "3725b78b-efdf-432a-bc75-ae91bcb54297", "fields": { - "element": "86304b47-3d6a-48dd-a903-d0e09cde91ba", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "700c1fd5-1f67-4180-aea0-9d6533df44a4", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, - "text": "ROY", + "text": "Lorem ipsum dolor sit amet", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "413a4b08-c496-4925-a440-e9edad4a9dc1", + "pk": "4a7546e6-9e00-4dc9-82ff-5ae9b8bfbfc7", "fields": { - "element": "b8d9cf10-dc77-4a41-90f8-badf9718ebb9", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "f20e8dd9-285b-4e9c-ba5c-4f63cdb2aaf8", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, - "text": "PARIS", + "text": "DATUM", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "5107512b-9bda-4f59-98e0-a0ff40c76d11", + "pk": "5717edbf-0c6e-4f92-95f0-b4088a023dde", "fields": { - "element": "ec742315-a1e6-4cf2-96c0-abe2b0e0e3fc", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "599a7d5a-63c5-4695-b6e3-8b164fbc9ebb", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, - "text": "DATUM", + "text": "PARIS", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "5b4bd0f0-63fa-43d1-970f-a79c44784392", + "pk": "5ed1f7ca-e777-4e30-86b3-4789197729be", "fields": { - "element": "344f6b5b-b001-4084-950c-1925fdd2eef4", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "580235a9-4050-45f2-b194-2c62ef69051e", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, "text": "PARIS", "orientation": "horizontal-lr", @@ -1431,22 +1431,22 @@ }, { "model": "documents.transcription", - "pk": "7e865edb-f16e-4406-a497-682e5a946593", + "pk": "6d0e6148-db3a-446a-bb16-539ea4255c30", "fields": { - "element": "9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "7841e9f9-f71f-4582-8cca-844cb55210be", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, - "text": "Lorem ipsum dolor sit amet", + "text": "DATUM", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "b3ec03c1-fa36-4347-b3ad-64abf63ce10c", + "pk": "7011525f-cae6-4582-b869-ebe16dd93764", "fields": { - "element": "2039b7b8-15cb-4b1f-a9e6-c681aa20e13a", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "43f1b64e-44a9-4648-a46c-0b605b420bd7", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, "text": "DATUM", "orientation": "horizontal-lr", @@ -1455,22 +1455,22 @@ }, { "model": "documents.transcription", - "pk": "bf9dddc1-c158-487c-99b4-37e0f7f44759", + "pk": "b2595600-d142-4515-a427-3b4b2dcb2d5f", "fields": { - "element": "4fed0e6a-9e3f-4e59-9c70-c5e74cc6d73c", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "f531f9a8-b826-4e87-bad3-edcd17cd435f", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, - "text": "PARIS", + "text": "ROY", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.transcription", - "pk": "e1978d01-9182-4a42-a76f-7ae776aaaf55", + "pk": "c399e488-675b-49ac-bd6b-a4be34756e87", "fields": { - "element": "e9aa6eb2-eb0b-4503-9bbf-c3d27b9cce5f", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "642de90b-27a0-4eb3-8742-000faa53f765", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, "text": "ROY", "orientation": "horizontal-lr", @@ -1479,51 +1479,51 @@ }, { "model": "documents.transcription", - "pk": "f5b81d19-ebb7-412d-9deb-455d72aaf829", + "pk": "c7d484cd-3080-41aa-b24d-e51fba98ee4e", "fields": { - "element": "2e8101c4-6fca-49b6-9b7b-41554facfa01", - "worker_version": "f62b2301-0447-4848-9848-a731ad801d60", + "element": "17419d59-90af-4e7f-8052-5425e5444e80", + "worker_version": "505988ea-21cb-4011-ad87-48bdab37237d", "worker_run": null, - "text": "DATUM", + "text": "ROY", "orientation": "horizontal-lr", "confidence": 1.0 } }, { "model": "documents.allowedmetadata", - "pk": "6437fdd1-97c0-4fff-a325-039d0379bdbe", + "pk": "43948eee-60c9-42e1-affd-c72312b7f18e", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "type": "text", "name": "folio" } }, { "model": "documents.allowedmetadata", - "pk": "89efa0fd-a187-4177-8de5-f3477dde921f", + "pk": "58be7836-60ef-43b8-a6ea-e4472f2c0c31", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "location", - "name": "location" + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "date", + "name": "date" } }, { "model": "documents.allowedmetadata", - "pk": "ab34cc71-a15a-40d9-8cc5-f680adca898b", + "pk": "60567a85-e544-47c4-b300-774308d0a6e8", "fields": { - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", - "type": "date", - "name": "date" + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "type": "location", + "name": "location" } }, { "model": "documents.metadata", - "pk": "0ba82a45-7667-4673-9258-34bbb9175f10", + "pk": "11db8db6-e672-4fff-bfd6-064a30f1091f", "fields": { - "element": "1d0f7ea6-6455-4a8d-ade2-a853f8ab7e08", - "name": "number", + "element": "786a05b3-28db-45f9-9386-d86e4bd2370e", + "name": "folio", "type": "text", - "value": "4", + "value": "1v", "entity": null, "worker_version": null, "worker_run": null @@ -1531,12 +1531,12 @@ }, { "model": "documents.metadata", - "pk": "3e7bf01c-c9d6-4322-bd3f-def5438d7384", + "pk": "2127a4b5-44d7-4198-b285-45d11263a250", "fields": { - "element": "d9744e02-66ef-424e-a71c-88b94b209688", - "name": "number", + "element": "236b450d-874b-40c9-9945-a7307a3be8d9", + "name": "folio", "type": "text", - "value": "5", + "value": "2r", "entity": null, "worker_version": null, "worker_run": null @@ -1544,12 +1544,12 @@ }, { "model": "documents.metadata", - "pk": "52139b43-747a-4ffe-9f5c-cd43e0ad2b39", + "pk": "46142d6e-2716-4b13-899d-e25557674722", "fields": { - "element": "e7027313-7910-44b4-bdff-28e8d187c0b4", + "element": "a433b9be-bf1d-4abd-840d-68401750c442", "name": "number", "type": "text", - "value": "3", + "value": "1", "entity": null, "worker_version": null, "worker_run": null @@ -1557,12 +1557,12 @@ }, { "model": "documents.metadata", - "pk": "62347283-e04f-4401-a8a9-ff402766ac2d", + "pk": "6764ffd2-7a40-4fb2-aa5d-b07be43ec12a", "fields": { - "element": "c8e488da-28d8-467e-a651-7d6f2ad6600d", + "element": "700c1fd5-1f67-4180-aea0-9d6533df44a4", "name": "folio", "type": "text", - "value": "2r", + "value": "1r", "entity": null, "worker_version": null, "worker_run": null @@ -1570,12 +1570,12 @@ }, { "model": "documents.metadata", - "pk": "660e0182-f139-4a4b-b65e-b60b3051ef4c", + "pk": "6b1e5c6f-c428-4bfe-84d1-5d70579a9e7b", "fields": { - "element": "9c42123c-b940-4695-b083-5b19392f68a6", - "name": "folio", + "element": "7d480460-0ab0-4471-a445-f72f2be75b9a", + "name": "number", "type": "text", - "value": "1r", + "value": "4", "entity": null, "worker_version": null, "worker_run": null @@ -1583,12 +1583,12 @@ }, { "model": "documents.metadata", - "pk": "af1f94f7-b9a8-4963-9e21-d4a8c34b2627", + "pk": "7d543dfd-2e55-41fe-a396-41fd5f9aa37a", "fields": { - "element": "39210eb6-70a9-4660-8a45-5c6957508936", - "name": "number", + "element": "173b6549-a747-446d-b073-567ec53f4a10", + "name": "folio", "type": "text", - "value": "1", + "value": "1r", "entity": null, "worker_version": null, "worker_run": null @@ -1596,12 +1596,12 @@ }, { "model": "documents.metadata", - "pk": "cb325348-d43a-404f-b7f1-06c2230840c5", + "pk": "8587b8a7-9b30-4018-9295-c069a2d18662", "fields": { - "element": "f75ca797-0067-4711-b899-6ed39b0a6f1a", + "element": "eedc30d8-ef94-40f6-843e-1900d15e47de", "name": "number", "type": "text", - "value": "2", + "value": "5", "entity": null, "worker_version": null, "worker_run": null @@ -1609,9 +1609,9 @@ }, { "model": "documents.metadata", - "pk": "da44df4c-a8ac-4bdc-a10b-ab6939ac76cd", + "pk": "8ab6a879-c132-4712-a985-212c3ba5ccb9", "fields": { - "element": "dc66eac3-a4e9-4c55-a4a8-d239bd79f3f6", + "element": "2dbf8cc9-cc10-4604-8eb5-de5f3718aa17", "name": "folio", "type": "text", "value": "1v", @@ -1622,12 +1622,12 @@ }, { "model": "documents.metadata", - "pk": "f55c81b9-bc87-4086-8cb1-b3a0eea6d491", + "pk": "a85ec19b-c732-4485-9655-4b6f9ba236fe", "fields": { - "element": "f530bd2f-d0ab-4f14-9d98-087486c4b1ab", + "element": "93c849a1-9e56-4fc8-a3a7-14200103482c", "name": "folio", "type": "text", - "value": "1v", + "value": "2r", "entity": null, "worker_version": null, "worker_run": null @@ -1635,12 +1635,12 @@ }, { "model": "documents.metadata", - "pk": "fb0bedec-cc2d-424e-86bf-05239865e3fd", + "pk": "c6840a9a-5db5-45d5-aeef-e231571b499e", "fields": { - "element": "02a6f67e-c7d4-4dec-b3a0-9f9ab3f2ee99", - "name": "folio", + "element": "d23ebc38-5318-46fc-bc1a-62466806723d", + "name": "number", "type": "text", - "value": "2r", + "value": "3", "entity": null, "worker_version": null, "worker_run": null @@ -1648,12 +1648,12 @@ }, { "model": "documents.metadata", - "pk": "fe60529e-30d4-4824-bb87-3534a652da81", + "pk": "f8240bb3-b50b-46a6-924c-7ebf057db537", "fields": { - "element": "9fc5e0a3-9afb-493e-bf0f-fc2998ba4ba4", - "name": "folio", + "element": "21e28831-aac0-4e11-b44a-4bcbd0f4ddf7", + "name": "number", "type": "text", - "value": "1r", + "value": "2", "entity": null, "worker_version": null, "worker_run": null @@ -1676,12 +1676,12 @@ }, { "model": "images.image", - "pk": "27c0757d-4d15-4f0e-bfc1-ec63bfada9e7", + "pk": "0fceb46b-af14-4568-984e-ac26ceb06fa4", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img5", + "path": "img4", "width": 1000, "height": 1000, "hash": null, @@ -1690,12 +1690,12 @@ }, { "model": "images.image", - "pk": "468df2f7-e22a-4dba-96d2-9c464ba6c2b0", + "pk": "7b3e5017-0c57-45da-aef8-fff14feba411", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img4", + "path": "img5", "width": 1000, "height": 1000, "hash": null, @@ -1704,7 +1704,7 @@ }, { "model": "images.image", - "pk": "53421855-db25-4abd-aba4-d86710a76d5d", + "pk": "a43940c0-254f-4920-b725-7f063bcc0b50", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -1718,12 +1718,12 @@ }, { "model": "images.image", - "pk": "a38b69a3-a4f8-4dfc-a6b9-38ea2e52c713", + "pk": "a4fef864-b712-41df-97cf-29b774450df8", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img3", + "path": "img2", "width": 1000, "height": 1000, "hash": null, @@ -1732,7 +1732,7 @@ }, { "model": "images.image", - "pk": "f250d3f3-bd56-4a2f-bffc-fc3a33cd1902", + "pk": "bd3c8ee4-358a-4a50-90f6-1080b910d615", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", @@ -1746,12 +1746,12 @@ }, { "model": "images.image", - "pk": "f5ddf7e0-3dd3-4f4f-ada6-f42de7be869d", + "pk": "e48347c1-2047-4e9a-b567-52a9e7b71ac6", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", "server": 1, - "path": "img2", + "path": "img3", "width": 1000, "height": 1000, "hash": null, @@ -1760,64 +1760,64 @@ }, { "model": "users.right", - "pk": "15896320-98f2-48e1-9d36-4cef1e15c119", + "pk": "007844fb-44e9-42ff-925f-954e98292c50", "fields": { "user": 2, "group": null, - "content_type": 19, - "content_id": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "content_type": 34, + "content_id": "d4c0ee9b-6c8e-4667-823b-01cdbf709161", "level": 100 } }, { "model": "users.right", - "pk": "2a7d975d-4f18-41f4-964f-817aeae09582", + "pk": "3f108462-43fd-417f-adbf-f2efcae42d99", "fields": { "user": 2, "group": null, "content_type": 11, - "content_id": "395522d1-94a0-407a-b5cb-347fa68ec2c4", + "content_id": "0464874a-12cd-4bb5-a06f-37a872a82e5a", "level": 10 } }, { "model": "users.right", - "pk": "7e2ea5d2-2408-49b4-b1b4-2ea0e12da7e4", + "pk": "6c838d5b-602d-4a59-a890-e0e7b45892d3", "fields": { - "user": 2, + "user": 3, "group": null, "content_type": 34, - "content_id": "8f459bd8-0440-444e-940b-3c14b10bdb0d", - "level": 100 + "content_id": "d4c0ee9b-6c8e-4667-823b-01cdbf709161", + "level": 50 } }, { "model": "users.right", - "pk": "db08fb62-277f-475c-ae92-cfb4a515b146", + "pk": "8e0946ec-f183-4777-9b01-fad1260274d8", "fields": { "user": 4, "group": null, "content_type": 34, - "content_id": "8f459bd8-0440-444e-940b-3c14b10bdb0d", + "content_id": "d4c0ee9b-6c8e-4667-823b-01cdbf709161", "level": 10 } }, { "model": "users.right", - "pk": "f61b568e-91d3-4e62-ba79-b5d7b326bc7d", + "pk": "bb2da668-ad2c-4869-900a-fabc8edc1c81", "fields": { - "user": 3, + "user": 2, "group": null, - "content_type": 34, - "content_id": "8f459bd8-0440-444e-940b-3c14b10bdb0d", - "level": 50 + "content_type": 19, + "content_id": "35d72d0a-7d7a-4165-ad65-a863725a470f", + "level": 100 } }, { "model": "users.user", "pk": 1, "fields": { - "password": "pbkdf2_sha256$390000$n41pgYuMjEMc9yABOJDuv7$szqrPzEEMyrNyvxRF5ekrtgzLra4rnw09jkS/Rlb65M=", + "password": "pbkdf2_sha256$390000$QSd65IybrTHuN13us8Mqz3$5D0bK0ZjilS/hgJNYMtiBeGqIyPXT2pS1opEuL2Xw5A=", "last_login": null, "email": "root@root.fr", "display_name": "Admin", @@ -1832,7 +1832,7 @@ "model": "users.user", "pk": 2, "fields": { - "password": "pbkdf2_sha256$390000$82MV7wfiM1CYxnR3JI9dRw$vavB8EAQRE2Z0REJhhWU7bRyrQqZF5wazXjl1rOrvTY=", + "password": "pbkdf2_sha256$390000$TpW8NPlptDOAWz4DsGYUPB$CsXsGa2P8pwWcYLAFkF40X3F8wNyqMl+eDNB/OLs3gw=", "last_login": null, "email": "user@user.fr", "display_name": "Test user", @@ -1875,7 +1875,7 @@ }, { "model": "users.group", - "pk": "8f459bd8-0440-444e-940b-3c14b10bdb0d", + "pk": "d4c0ee9b-6c8e-4667-823b-01cdbf709161", "fields": { "name": "User group", "public": false, @@ -3871,42 +3871,124 @@ "codename": "view_datasetelement" } }, +{ + "model": "auth.permission", + "pk": 222, + "fields": { + "name": "Can add dataset set", + "content_type": 57, + "codename": "add_datasetset" + } +}, +{ + "model": "auth.permission", + "pk": 223, + "fields": { + "name": "Can change dataset set", + "content_type": 57, + "codename": "change_datasetset" + } +}, +{ + "model": "auth.permission", + "pk": 224, + "fields": { + "name": "Can delete dataset set", + "content_type": 57, + "codename": "delete_datasetset" + } +}, +{ + "model": "auth.permission", + "pk": 225, + "fields": { + "name": "Can view dataset set", + "content_type": 57, + "codename": "view_datasetset" + } +}, { "model": "ponos.farm", - "pk": "395522d1-94a0-407a-b5cb-347fa68ec2c4", + "pk": "0464874a-12cd-4bb5-a06f-37a872a82e5a", "fields": { "name": "Wheat farm", - "seed": "444809637bf8b3088a181de520a02151a99477edd051710e90a2d1fcdf860dbb" + "seed": "752dbcf9ba697d6affdd82339c81ca2db28f9b046961a317e0c509561bbc3ad7" } }, { "model": "training.dataset", - "pk": "4a844b36-08bd-4284-918e-5e03d543df13", + "pk": "2d515087-c048-42a1-a21a-5c1ba589f455", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "creator": 2, "task": null, - "name": "Second Dataset", - "description": "dataset number two", - "state": "open", - "sets": "[\"training\", \"test\", \"validation\"]" + "name": "First Dataset", + "description": "dataset number one", + "state": "open" } }, { "model": "training.dataset", - "pk": "630d1cf5-3ba8-4335-b0be-d1be4cb18782", + "pk": "3a350d60-ce8d-4e26-a764-92b1ff02b3f8", "fields": { "created": "2020-02-02T01:23:45.678Z", "updated": "2020-02-02T01:23:45.678Z", - "corpus": "f34313ec-45ec-4ac4-92e5-43d7eb045c12", + "corpus": "35d72d0a-7d7a-4165-ad65-a863725a470f", "creator": 2, "task": null, - "name": "First Dataset", - "description": "dataset number one", - "state": "open", - "sets": "[\"training\", \"test\", \"validation\"]" + "name": "Second Dataset", + "description": "dataset number two", + "state": "open" + } +}, +{ + "model": "training.datasetset", + "pk": "04b3fdac-fd31-48b7-8d39-cfe68ebbe464", + "fields": { + "name": "training", + "dataset": "2d515087-c048-42a1-a21a-5c1ba589f455" + } +}, +{ + "model": "training.datasetset", + "pk": "35188db8-ac84-4edf-8f85-b747da5d20ab", + "fields": { + "name": "test", + "dataset": "2d515087-c048-42a1-a21a-5c1ba589f455" + } +}, +{ + "model": "training.datasetset", + "pk": "70e0b644-45cc-4fa7-8290-f9a56801d664", + "fields": { + "name": "validation", + "dataset": "2d515087-c048-42a1-a21a-5c1ba589f455" + } +}, +{ + "model": "training.datasetset", + "pk": "8a800950-7647-4001-8a19-fc2175061a24", + "fields": { + "name": "training", + "dataset": "3a350d60-ce8d-4e26-a764-92b1ff02b3f8" + } +}, +{ + "model": "training.datasetset", + "pk": "8c412165-7e72-4509-afc8-e4e5d13f428c", + "fields": { + "name": "validation", + "dataset": "3a350d60-ce8d-4e26-a764-92b1ff02b3f8" + } +}, +{ + "model": "training.datasetset", + "pk": "da8eb663-d2f0-489f-bcb5-6660386ba80b", + "fields": { + "name": "test", + "dataset": "3a350d60-ce8d-4e26-a764-92b1ff02b3f8" } } ] diff --git a/arkindex/documents/management/commands/build_fixtures.py b/arkindex/documents/management/commands/build_fixtures.py index b0e5d501035c7ff75b250fb07990546ccf0bab7e..e148226b0d09ab54cee68d4ea410ecbcd204c638 100644 --- a/arkindex/documents/management/commands/build_fixtures.py +++ b/arkindex/documents/management/commands/build_fixtures.py @@ -20,6 +20,7 @@ from arkindex.process.models import ( WorkerVersionState, ) from arkindex.project.tools import fake_now +from arkindex.training.models import DatasetSet from arkindex.users.models import Group, Right, Role, User @@ -271,8 +272,15 @@ class Command(BaseCommand): ) # Create 2 datasets - corpus.datasets.create(name="First Dataset", description="dataset number one", creator=user) - corpus.datasets.create(name="Second Dataset", description="dataset number two", creator=user) + dataset_1 = corpus.datasets.create(name="First Dataset", description="dataset number one", creator=user) + dataset_2 = corpus.datasets.create(name="Second Dataset", description="dataset number two", creator=user) + # Create their sets + DatasetSet.objects.bulk_create( + DatasetSet(name=name, dataset_id=dataset_1.id) for name in ["training", "validation", "test"] + ) + DatasetSet.objects.bulk_create( + DatasetSet(name=name, dataset_id=dataset_2.id) for name in ["training", "validation", "test"] + ) # Create 2 volumes vol1 = Element.objects.create( diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index acd269aede5489fcea12ae307f90fb61b7a452f4..ee4c680a850683f409fda99ca42e28d3d7e8efc7 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -24,7 +24,7 @@ from arkindex.documents.models import ( ) from arkindex.ponos.models import Task from arkindex.process.models import Process, ProcessDataset, ProcessElement, WorkerActivity, WorkerRun -from arkindex.training.models import DatasetElement +from arkindex.training.models import DatasetElement, DatasetSet from arkindex.users.models import User logger = logging.getLogger(__name__) @@ -73,7 +73,8 @@ def corpus_delete(corpus_id: str) -> None: # ProcessDataset M2M ProcessDataset.objects.filter(dataset__corpus_id=corpus_id), ProcessDataset.objects.filter(process__corpus_id=corpus_id), - DatasetElement.objects.filter(dataset__corpus_id=corpus_id), + DatasetElement.objects.filter(set__dataset__corpus_id=corpus_id), + DatasetSet.objects.filter(dataset__corpus_id=corpus_id), corpus.datasets.all(), # Delete the hidden M2M task parents table Task.parents.through.objects.filter(from_task__process__corpus_id=corpus_id), diff --git a/arkindex/documents/tests/tasks/test_corpus_delete.py b/arkindex/documents/tests/tasks/test_corpus_delete.py index 1685f99b416ec76434c7c25e85b5b2698f4c93ad..cea863ab3ce0effaa6e0250a92181e466774b196 100644 --- a/arkindex/documents/tests/tasks/test_corpus_delete.py +++ b/arkindex/documents/tests/tasks/test_corpus_delete.py @@ -5,7 +5,7 @@ from arkindex.documents.tasks import corpus_delete from arkindex.ponos.models import Farm, State, Task from arkindex.process.models import CorpusWorkerVersion, Process, ProcessDataset, ProcessMode, Repository, WorkerVersion from arkindex.project.tests import FixtureTestCase, force_constraints_immediate -from arkindex.training.models import Dataset +from arkindex.training.models import Dataset, DatasetSet class TestDeleteCorpus(FixtureTestCase): @@ -114,18 +114,25 @@ class TestDeleteCorpus(FixtureTestCase): cls.corpus2 = Corpus.objects.create(name="Other corpus") dataset1 = Dataset.objects.get(name="First Dataset") - dataset1.dataset_elements.create(element=element, set="test") + test_set_1 = dataset1.sets.get(name="test") + test_set_1.set_elements.create(element=element) cls.dataset2 = Dataset.objects.create(name="Dead Sea Scrolls", description="How to trigger a Third Impact", creator=cls.user, corpus=cls.corpus2) + DatasetSet.objects.bulk_create( + DatasetSet( + dataset=cls.dataset2, + name=set_name + ) for set_name in ["test", "training", "validation"] + ) # Process on cls.corpus and with a dataset from cls.corpus dataset_process1 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset) - ProcessDataset.objects.create(process=dataset_process1, dataset=dataset1, sets=dataset1.sets) + ProcessDataset.objects.create(process=dataset_process1, dataset=dataset1, sets=list(dataset1.sets.values_list("name", flat=True))) # Process on cls.corpus with a dataset from another corpus dataset_process2 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset) - ProcessDataset.objects.create(process=dataset_process2, dataset=dataset1, sets=dataset1.sets) - ProcessDataset.objects.create(process=dataset_process2, dataset=cls.dataset2, sets=cls.dataset2.sets) + ProcessDataset.objects.create(process=dataset_process2, dataset=dataset1, sets=list(dataset1.sets.values_list("name", flat=True))) + ProcessDataset.objects.create(process=dataset_process2, dataset=cls.dataset2, sets=list(cls.dataset2.sets.values_list("name", flat=True))) # Process on another corpus with a dataset from another corpus and none from cls.corpus cls.dataset_process3 = cls.corpus2.processes.create(creator=cls.user, mode=ProcessMode.Dataset) - ProcessDataset.objects.create(process=cls.dataset_process3, dataset=cls.dataset2, sets=cls.dataset2.sets) + ProcessDataset.objects.create(process=cls.dataset_process3, dataset=cls.dataset2, sets=list(cls.dataset2.sets.values_list("name", flat=True))) cls.rev = cls.repo.revisions.create( hash="42", diff --git a/arkindex/documents/tests/tasks/test_export.py b/arkindex/documents/tests/tasks/test_export.py index d04e63a14758d6421d0e1e08fe0a4b777fa489b6..4aae19ae02ea055861ac6831f41451d8b246f584 100644 --- a/arkindex/documents/tests/tasks/test_export.py +++ b/arkindex/documents/tests/tasks/test_export.py @@ -24,6 +24,7 @@ from arkindex.documents.models import ( from arkindex.images.models import Image, ImageServer from arkindex.process.models import Repository, WorkerType, WorkerVersion, WorkerVersionState from arkindex.project.tests import FixtureTestCase +from arkindex.training.models import DatasetElement TABLE_NAMES = { "export_version", @@ -131,8 +132,9 @@ class TestExport(FixtureTestCase): ) dataset = self.corpus.datasets.get(name="First Dataset") - dataset.dataset_elements.create(element=element, set="train") - dataset.dataset_elements.create(element=element, set="validation") + _, train_set, validation_set = dataset.sets.all().order_by("name") + train_set.set_elements.create(element=element) + validation_set.set_elements.create(element=element) export = self.corpus.exports.create(user=self.user) @@ -488,7 +490,7 @@ class TestExport(FixtureTestCase): ( str(dataset.id), dataset.name, - ",".join(dataset.sets), + ",".join(list(dataset.sets.values_list("name", flat=True))), ) for dataset in self.corpus.datasets.all() ] ) @@ -506,9 +508,9 @@ class TestExport(FixtureTestCase): ( str(dataset_element.id), str(dataset_element.element_id), - str(dataset_element.dataset_id), - dataset_element.set - ) for dataset_element in dataset.dataset_elements.all() + str(dataset_element.set.dataset_id), + dataset_element.set.name + ) for dataset_element in DatasetElement.objects.filter(set__dataset_id=dataset.id) ] ) diff --git a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py index 1f25a1aa0716d5ed7d452606e325f312da4a3102..f766bc0b0220384bab1b7ffccfb10d0d876418b0 100644 --- a/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_selection_worker_results_delete.py @@ -183,7 +183,8 @@ class TestDeleteSelectionWorkerResults(FixtureTestCase): job_mock.return_value.user_id = self.user.id self.page1.worker_version = self.version self.page1.save() - Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.page1, set="test") + dataset = Dataset.objects.get(name="First Dataset") + dataset.sets.get(name="test").set_elements.create(element=self.page1) self.user.selected_elements.set([self.page1]) selection_worker_results_delete(corpus_id=self.corpus.id, version_id=self.version.id) diff --git a/arkindex/documents/tests/tasks/test_worker_results_delete.py b/arkindex/documents/tests/tasks/test_worker_results_delete.py index 0fb898cf350c4bcf2eb3bb07ac6f960ee752c78f..52546e1bac401f5cd53028fb17a460e07c755b31 100644 --- a/arkindex/documents/tests/tasks/test_worker_results_delete.py +++ b/arkindex/documents/tests/tasks/test_worker_results_delete.py @@ -6,7 +6,7 @@ from arkindex.documents.models import Entity, EntityType, MLClass, Transcription from arkindex.documents.tasks import worker_results_delete from arkindex.process.models import ProcessMode, WorkerVersion from arkindex.project.tests import FixtureTestCase -from arkindex.training.models import Dataset, Model, ModelVersionState +from arkindex.training.models import DatasetSet, Model, ModelVersionState class TestDeleteWorkerResults(FixtureTestCase): @@ -270,7 +270,7 @@ class TestDeleteWorkerResults(FixtureTestCase): self.page1.worker_run = self.worker_run_1 self.page1.worker_version = self.version_1 self.page1.save() - Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.page1, set="test") + DatasetSet.objects.get(name="test", dataset__name="First Dataset").set_elements.create(element=self.page1) worker_results_delete(corpus_id=self.corpus.id) # Prevent delaying constraints check at end of the test transaction diff --git a/arkindex/documents/tests/test_destroy_elements.py b/arkindex/documents/tests/test_destroy_elements.py index da6312ac19d0fcd0863ce87752baa5bd7dcf95ef..af9fe5fb44f798e919d50a245102ab6a1c68a0f9 100644 --- a/arkindex/documents/tests/test_destroy_elements.py +++ b/arkindex/documents/tests/test_destroy_elements.py @@ -148,7 +148,8 @@ class TestDestroyElements(FixtureAPITestCase): """ An element cannot be deleted via the API if linked to a dataset """ - Dataset.objects.get(name="First Dataset").dataset_elements.create(element=self.vol, set="test") + dataset = Dataset.objects.get(name="First Dataset") + dataset.sets.get(name="test").set_elements.create(element=self.vol) self.client.force_login(self.user) with self.assertNumQueries(3): response = self.client.delete(reverse("api:element-retrieve", kwargs={"pk": str(self.vol.id)})) @@ -179,9 +180,9 @@ class TestDestroyElements(FixtureAPITestCase): """ Elements that are part of a dataset cannot be deleted """ - Dataset.objects.get(name="First Dataset").dataset_elements.create( - element=Element.objects.get_descending(self.vol.id).first(), - set="test", + dataset = Dataset.objects.get(name="First Dataset") + dataset.sets.get(name="test").set_elements.create( + element=Element.objects.get_descending(self.vol.id).first() ) Element.objects.filter(id=self.vol.id).trash() diff --git a/arkindex/process/api.py b/arkindex/process/api.py index e6f76cf64579eddb4aa3029edda8a3b7a28a478f..f3eb182c9454e29f7f18afdc4205d0903875c7db 100644 --- a/arkindex/process/api.py +++ b/arkindex/process/api.py @@ -706,6 +706,7 @@ class ProcessDatasets(ProcessACLMixin, ListAPIView): return ( ProcessDataset.objects.filter(process_id=self.process.id) .select_related("process__creator", "dataset__creator") + .prefetch_related("dataset__sets") .order_by("dataset__name") ) @@ -715,8 +716,6 @@ class ProcessDatasets(ProcessACLMixin, ListAPIView): if not self.kwargs: return context context["process"] = self.process - # Disable set elements counts in serialized dataset - context["sets_count"] = False return context @@ -751,6 +750,7 @@ class ProcessDatasetManage(CreateAPIView, UpdateAPIView, DestroyAPIView): process_dataset = get_object_or_404( ProcessDataset.objects .select_related("dataset__creator", "process__corpus") + .prefetch_related("dataset__sets") # Required to check for a process that have already started .annotate(process_has_tasks=Exists(Task.objects.filter(process_id=self.kwargs["process"]))), dataset_id=self.kwargs["dataset"], process_id=self.kwargs["process"] @@ -759,12 +759,6 @@ class ProcessDatasetManage(CreateAPIView, UpdateAPIView, DestroyAPIView): process_dataset.process.has_tasks = process_dataset.process_has_tasks return process_dataset - def get_serializer_context(self): - context = super().get_serializer_context() - # Disable set elements counts in serialized dataset - context["sets_count"] = False - return context - def destroy(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) diff --git a/arkindex/process/migrations/0029_processdataset_sets.py b/arkindex/process/migrations/0029_processdataset_sets.py index 868c1cc29fe5814d20fa4d4988ec5b55b89f486a..f1b1cda5097ebf68abaf276d22464f1ab5b3147d 100644 --- a/arkindex/process/migrations/0029_processdataset_sets.py +++ b/arkindex/process/migrations/0029_processdataset_sets.py @@ -1,8 +1,8 @@ import django.core.validators from django.db import migrations, models +import arkindex.process.models import arkindex.project.fields -import arkindex.training.models class Migration(migrations.Migration): @@ -37,7 +37,7 @@ class Migration(migrations.Migration): validators=[django.core.validators.MinLengthValidator(1)] ), size=None, - validators=[django.core.validators.MinLengthValidator(1), arkindex.training.models.validate_unique_set_names] + validators=[django.core.validators.MinLengthValidator(1), arkindex.process.models.validate_unique_set_names] ), ), ] diff --git a/arkindex/process/models.py b/arkindex/process/models.py index 75774d70481aec6286006d53e6d5fafe07d96662..e32dd410fe26622c3d7ea6506dac24cea163457b 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -5,6 +5,7 @@ from typing import Optional from django.conf import settings from django.contrib.contenttypes.fields import GenericRelation +from django.core.exceptions import ValidationError from django.core.validators import MinLengthValidator, MinValueValidator from django.db import models, transaction from django.db.models import F, Q @@ -27,8 +28,9 @@ from arkindex.process.managers import ( from arkindex.project.aws import S3FileMixin, S3FileStatus from arkindex.project.fields import ArrayField, MD5HashField from arkindex.project.models import IndexableModel +from arkindex.project.tools import is_prefetched from arkindex.project.validators import MaxValueValidator -from arkindex.training.models import ModelVersion, ModelVersionState, validate_unique_set_names +from arkindex.training.models import ModelVersion, ModelVersionState from arkindex.users.models import Role @@ -40,6 +42,11 @@ def process_max_chunks(): return settings.MAX_CHUNKS +def validate_unique_set_names(sets): + if len(set(sets)) != len(sets): + raise ValidationError("Set names must be unique.") + + class ActivityState(Enum): """ Store the state of the workers activity tracking for a process. @@ -218,11 +225,7 @@ class Process(IndexableModel): See https://stackoverflow.com/a/19651840/5990435 """ - return ( - hasattr(self, "_prefetched_objects_cache") - and self.tasks.field.remote_field.get_cache_name() - in self._prefetched_objects_cache - ) + return is_prefetched(self.tasks) @property def expiry(self): diff --git a/arkindex/process/serializers/training.py b/arkindex/process/serializers/training.py index e7c25f1ec73aa303c934ed75e7950d701df78c60..f280a16a650566dfcaab024d23d1d1c3d52faa8a 100644 --- a/arkindex/process/serializers/training.py +++ b/arkindex/process/serializers/training.py @@ -89,7 +89,7 @@ class ProcessDatasetSerializer(ProcessACLMixin, serializers.ModelSerializer): else: dataset_qs = Dataset.objects.filter(corpus__in=Corpus.objects.readable(self._user)) try: - dataset = dataset_qs.select_related("creator").get(pk=data["dataset_id"]) + dataset = dataset_qs.select_related("creator").prefetch_related("sets").get(pk=data["dataset_id"]) except Dataset.DoesNotExist: raise ValidationError({"dataset": [f'Invalid pk "{str(data["dataset_id"])}" - object does not exist.']}) else: @@ -109,11 +109,11 @@ class ProcessDatasetSerializer(ProcessACLMixin, serializers.ModelSerializer): sets = data.get("sets") if not sets or len(sets) == 0: if not self.instance: - data["sets"] = dataset.sets + data["sets"] = [item.name for item in list(dataset.sets.all())] else: errors["sets"].append("This field cannot be empty.") else: - if any(s not in dataset.sets for s in sets): + if any(s not in [item.name for item in list(dataset.sets.all())] for s in sets): errors["sets"].append("The specified sets must all exist in the specified dataset.") if len(set(sets)) != len(sets): errors["sets"].append("Sets must be unique.") diff --git a/arkindex/process/tests/test_create_process.py b/arkindex/process/tests/test_create_process.py index 34264e1a045182bcde56ab168bd02f77605e68b5..59e454700caae9873e09d5c55644cb57e220f616 100644 --- a/arkindex/process/tests/test_create_process.py +++ b/arkindex/process/tests/test_create_process.py @@ -899,7 +899,8 @@ class TestCreateProcess(FixtureAPITestCase): self.client.force_login(self.user) process = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Dataset) dataset = self.corpus.datasets.first() - ProcessDataset.objects.create(process=process, dataset=dataset, sets=dataset.sets) + test_sets = list(dataset.sets.values_list("name", flat=True)) + ProcessDataset.objects.create(process=process, dataset=dataset, sets=test_sets) process.versions.set([self.version_2, self.version_3]) with self.assertNumQueries(9): @@ -929,7 +930,8 @@ class TestCreateProcess(FixtureAPITestCase): self.worker_1.save() process = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Dataset) dataset = self.corpus.datasets.first() - ProcessDataset.objects.create(process=process, dataset=dataset, sets=dataset.sets) + test_sets = list(dataset.sets.values_list("name", flat=True)) + ProcessDataset.objects.create(process=process, dataset=dataset, sets=test_sets) process.versions.add(self.version_1) with self.assertNumQueries(9): diff --git a/arkindex/process/tests/test_process_datasets.py b/arkindex/process/tests/test_process_datasets.py index 5f35cff0b91bcd2f438f7004b13e6f0d22e6e42c..550c2968dae2480d62631aa0bf7e8990d582a30b 100644 --- a/arkindex/process/tests/test_process_datasets.py +++ b/arkindex/process/tests/test_process_datasets.py @@ -9,7 +9,7 @@ from arkindex.documents.models import Corpus from arkindex.ponos.models import Farm from arkindex.process.models import Process, ProcessDataset, ProcessMode from arkindex.project.tests import FixtureAPITestCase -from arkindex.training.models import Dataset +from arkindex.training.models import Dataset, DatasetSet from arkindex.users.models import Role, User # Using the fake DB fixtures creation date when needed @@ -28,6 +28,10 @@ class TestProcessDatasets(FixtureAPITestCase): description="Human instrumentality manual", creator=cls.user ) + DatasetSet.objects.bulk_create([ + DatasetSet(dataset_id=cls.private_dataset.id, name=set_name) + for set_name in ["validation", "training", "test"] + ]) cls.test_user = User.objects.create(email="katsuragi@nerv.co.jp", verified_email=True) cls.private_corpus.memberships.create(user=cls.test_user, level=Role.Admin.value) @@ -40,8 +44,8 @@ class TestProcessDatasets(FixtureAPITestCase): corpus_id=cls.private_corpus.id, farm=Farm.objects.get(name="Wheat farm") ) - cls.process_dataset_1 = ProcessDataset.objects.create(process=cls.dataset_process, dataset=cls.dataset1, sets=cls.dataset1.sets) - cls.process_dataset_2 = ProcessDataset.objects.create(process=cls.dataset_process, dataset=cls.private_dataset, sets=cls.private_dataset.sets) + cls.process_dataset_1 = ProcessDataset.objects.create(process=cls.dataset_process, dataset=cls.dataset1, sets=list(cls.dataset1.sets.values_list("name", flat=True))) + cls.process_dataset_2 = ProcessDataset.objects.create(process=cls.dataset_process, dataset=cls.private_dataset, sets=list(cls.private_dataset.sets.values_list("name", flat=True))) # Control process to check that its datasets are not retrieved cls.dataset_process_2 = Process.objects.create( @@ -49,7 +53,7 @@ class TestProcessDatasets(FixtureAPITestCase): mode=ProcessMode.Dataset, corpus_id=cls.corpus.id ) - ProcessDataset.objects.create(process=cls.dataset_process_2, dataset=cls.dataset2, sets=cls.dataset2.sets) + ProcessDataset.objects.create(process=cls.dataset_process_2, dataset=cls.dataset2, sets=list(cls.dataset2.sets.values_list("name", flat=True))) # List process datasets @@ -78,69 +82,84 @@ class TestProcessDatasets(FixtureAPITestCase): def test_list(self): self.client.force_login(self.test_user) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.get(reverse("api:process-datasets", kwargs={"pk": self.dataset_process.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()["results"], [ - { - "id": str(self.process_dataset_2.id), - "dataset": { - "id": str(self.private_dataset.id), - "name": "Dead sea scrolls", - "description": "Human instrumentality manual", - "creator": "Test user", - "sets": ["training", "test", "validation"], - "set_elements": None, - "corpus_id": str(self.private_corpus.id), - "state": "open", - "task_id": None, - "created": FAKE_CREATED, - "updated": FAKE_CREATED - }, - "sets": ["training", "test", "validation"] - }, - { - "id": str(self.process_dataset_1.id), - "dataset": { - "id": str(self.dataset1.id), - "name": "First Dataset", - "description": "dataset number one", - "creator": "Test user", - "sets": ["training", "test", "validation"], - "set_elements": None, - "corpus_id": str(self.corpus.id), - "state": "open", - "task_id": None, - "created": FAKE_CREATED, - "updated": FAKE_CREATED - }, - "sets": ["training", "test", "validation"] + sets_0 = response.json()["results"][0].pop("sets") + self.assertCountEqual(sets_0, ["validation", "training", "test"]) + self.assertDictEqual(response.json()["results"][0], { + "id": str(self.process_dataset_2.id), + "dataset": { + "id": str(self.private_dataset.id), + "name": "Dead sea scrolls", + "description": "Human instrumentality manual", + "creator": "Test user", + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.private_dataset.sets.all() + ], + "set_elements": None, + "corpus_id": str(self.private_corpus.id), + "state": "open", + "task_id": None, + "created": FAKE_CREATED, + "updated": FAKE_CREATED } - ]) + }) + sets_1 = response.json()["results"][1].pop("sets") + self.assertCountEqual(sets_1, ["validation", "training", "test"]) + self.assertDictEqual(response.json()["results"][1], { + "id": str(self.process_dataset_1.id), + "dataset": { + "id": str(self.dataset1.id), + "name": "First Dataset", + "description": "dataset number one", + "creator": "Test user", + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset1.sets.all() + ], + "set_elements": None, + "corpus_id": str(self.corpus.id), + "state": "open", + "task_id": None, + "created": FAKE_CREATED, + "updated": FAKE_CREATED + } + }) # Create process dataset def test_create_requires_login(self): + test_sets = list(self.dataset2.sets.values_list("name", flat=True)) with self.assertNumQueries(0): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), - data={"sets": self.dataset2.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) def test_create_requires_verified(self): unverified_user = User.objects.create(email="email@mail.com") + test_sets = list(self.dataset2.sets.values_list("name", flat=True)) self.client.force_login(unverified_user) with self.assertNumQueries(2): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), - data={"sets": self.dataset2.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) @patch("arkindex.project.mixins.get_max_level") def test_create_access_level(self, get_max_level_mock): cases = [None, Role.Guest.value, Role.Contributor.value] + test_sets = list(self.dataset2.sets.values_list("name", flat=True)) for level in cases: with self.subTest(level=level): get_max_level_mock.reset_mock() @@ -150,7 +169,7 @@ class TestProcessDatasets(FixtureAPITestCase): with self.assertNumQueries(3): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), - data={"sets": self.dataset2.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) @@ -161,16 +180,17 @@ class TestProcessDatasets(FixtureAPITestCase): def test_create_process_mode(self): cases = set(ProcessMode) - {ProcessMode.Dataset, ProcessMode.Local} + test_sets = list(self.dataset2.sets.values_list("name", flat=True)) for mode in cases: with self.subTest(mode=mode): self.dataset_process.mode = mode self.dataset_process.save() self.client.force_login(self.test_user) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), - data={"sets": self.dataset2.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -178,22 +198,24 @@ class TestProcessDatasets(FixtureAPITestCase): def test_create_process_mode_local(self): self.client.force_login(self.user) + test_sets = list(self.dataset2.sets.values_list("name", flat=True)) local_process = Process.objects.get(creator=self.user, mode=ProcessMode.Local) with self.assertNumQueries(3): response = self.client.post( reverse("api:process-dataset", kwargs={"process": local_process.id, "dataset": self.dataset2.id}), - data={"sets": self.dataset2.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) self.assertEqual(response.json(), {"detail": "You do not have admin access to this process."}) def test_create_wrong_process_uuid(self): self.client.force_login(self.test_user) + test_sets = list(self.dataset2.sets.values_list("name", flat=True)) wrong_id = uuid.uuid4() with self.assertNumQueries(3): response = self.client.post( reverse("api:process-dataset", kwargs={"process": wrong_id, "dataset": self.dataset2.id}), - data={"sets": self.dataset2.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(response.json(), {"process": [f'Invalid pk "{str(wrong_id)}" - object does not exist.']}) @@ -213,12 +235,13 @@ class TestProcessDatasets(FixtureAPITestCase): def test_create_dataset_access(self, filter_rights_mock): new_corpus = Corpus.objects.create(name="NERV") new_dataset = new_corpus.datasets.create(name="Eva series", description="We created the Evas from Adam", creator=self.user) + test_sets = list(new_dataset.sets.values_list("name", flat=True)) self.client.force_login(self.test_user) with self.assertNumQueries(3): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": new_dataset.id}), - data={"sets": new_dataset.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -229,12 +252,13 @@ class TestProcessDatasets(FixtureAPITestCase): def test_create_unique(self): self.client.force_login(self.test_user) + test_sets = list(self.dataset1.sets.values_list("name", flat=True)) self.assertTrue(self.dataset_process.datasets.filter(id=self.dataset1.id).exists()) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), - data={"sets": self.dataset1.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -243,11 +267,12 @@ class TestProcessDatasets(FixtureAPITestCase): def test_create_started(self): self.client.force_login(self.test_user) self.dataset_process.tasks.create(run=0, depth=0, slug="makrout") + test_sets = list(self.dataset2.sets.values_list("name", flat=True)) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), - data={"sets": self.dataset2.sets} + data={"sets": test_sets} ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -257,7 +282,7 @@ class TestProcessDatasets(FixtureAPITestCase): self.client.force_login(self.test_user) self.assertEqual(ProcessDataset.objects.count(), 3) self.assertFalse(ProcessDataset.objects.filter(process=self.dataset_process.id, dataset=self.dataset2.id).exists()) - with self.assertNumQueries(6): + with self.assertNumQueries(7): response = self.client.post( reverse( "api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id} @@ -272,6 +297,8 @@ class TestProcessDatasets(FixtureAPITestCase): self.dataset2 ]) created = ProcessDataset.objects.get(process=self.dataset_process.id, dataset=self.dataset2.id) + process_sets = response.json().pop("sets") + self.assertCountEqual(process_sets, ["validation", "training", "test"]) self.assertDictEqual(response.json(), { "id": str(created.id), "dataset": { @@ -279,22 +306,27 @@ class TestProcessDatasets(FixtureAPITestCase): "name": "Second Dataset", "description": "dataset number two", "creator": "Test user", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset2.sets.all() + ], "set_elements": None, "corpus_id": str(self.corpus.id), "state": "open", "task_id": None, "created": FAKE_CREATED, "updated": FAKE_CREATED - }, - "sets": ["training", "test", "validation"] + } }) def test_create(self): self.client.force_login(self.test_user) self.assertEqual(ProcessDataset.objects.count(), 3) self.assertFalse(ProcessDataset.objects.filter(process=self.dataset_process.id, dataset=self.dataset2.id).exists()) - with self.assertNumQueries(6): + with self.assertNumQueries(7): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), data={"sets": ["validation", "test"]} @@ -315,7 +347,13 @@ class TestProcessDatasets(FixtureAPITestCase): "name": "Second Dataset", "description": "dataset number two", "creator": "Test user", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset2.sets.all() + ], "set_elements": None, "corpus_id": str(self.corpus.id), "state": "open", @@ -330,7 +368,7 @@ class TestProcessDatasets(FixtureAPITestCase): self.client.force_login(self.test_user) self.assertEqual(ProcessDataset.objects.count(), 3) self.assertFalse(ProcessDataset.objects.filter(process=self.dataset_process.id, dataset=self.dataset2.id).exists()) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.post( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), data={"sets": ["Unit-01"]} @@ -367,7 +405,7 @@ class TestProcessDatasets(FixtureAPITestCase): if level: self.private_corpus.memberships.create(user=self.test_user, level=level.value) self.client.force_login(self.test_user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.put( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test"]} @@ -403,7 +441,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_update(self): self.client.force_login(self.test_user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.put( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test"]} @@ -416,7 +454,13 @@ class TestProcessDatasets(FixtureAPITestCase): "name": "First Dataset", "description": "dataset number one", "creator": "Test user", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset1.sets.all() + ], "set_elements": None, "corpus_id": str(self.corpus.id), "state": "open", @@ -429,7 +473,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_update_wrong_sets(self): self.client.force_login(self.test_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["Unit-01", "Unit-02"]} @@ -439,7 +483,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_update_unique_sets(self): self.client.force_login(self.test_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test", "test"]} @@ -455,7 +499,7 @@ class TestProcessDatasets(FixtureAPITestCase): expiry=datetime(1970, 1, 1, tzinfo=timezone.utc), ) self.client.force_login(self.test_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test"]} @@ -468,7 +512,7 @@ class TestProcessDatasets(FixtureAPITestCase): Non "sets" fields in the update request are ignored """ self.client.force_login(self.test_user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.put( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"process": str(self.dataset_process_2.id), "dataset": str(self.dataset2.id), "sets": ["test"]} @@ -481,7 +525,13 @@ class TestProcessDatasets(FixtureAPITestCase): "name": "First Dataset", "description": "dataset number one", "creator": "Test user", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset1.sets.all() + ], "set_elements": None, "corpus_id": str(self.corpus.id), "state": "open", @@ -520,7 +570,7 @@ class TestProcessDatasets(FixtureAPITestCase): if level: self.private_corpus.memberships.create(user=self.test_user, level=level.value) self.client.force_login(self.test_user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.patch( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test"]} @@ -556,7 +606,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_partial_update(self): self.client.force_login(self.test_user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.patch( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test"]} @@ -569,7 +619,13 @@ class TestProcessDatasets(FixtureAPITestCase): "name": "First Dataset", "description": "dataset number one", "creator": "Test user", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset1.sets.all() + ], "set_elements": None, "corpus_id": str(self.corpus.id), "state": "open", @@ -582,7 +638,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_partial_update_wrong_sets(self): self.client.force_login(self.test_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.patch( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["Unit-01", "Unit-02"]} @@ -592,7 +648,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_partial_update_unique_sets(self): self.client.force_login(self.test_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.patch( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test", "test"]} @@ -608,7 +664,7 @@ class TestProcessDatasets(FixtureAPITestCase): expiry=datetime(1970, 1, 1, tzinfo=timezone.utc), ) self.client.force_login(self.test_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.patch( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"sets": ["test"]} @@ -621,7 +677,7 @@ class TestProcessDatasets(FixtureAPITestCase): Non "sets" fields in the partial update request are ignored """ self.client.force_login(self.test_user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.patch( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), data={"process": str(self.dataset_process_2.id), "dataset": str(self.dataset2.id), "sets": ["test"]} @@ -634,7 +690,13 @@ class TestProcessDatasets(FixtureAPITestCase): "name": "First Dataset", "description": "dataset number one", "creator": "Test user", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset1.sets.all() + ], "set_elements": None, "corpus_id": str(self.corpus.id), "state": "open", @@ -677,7 +739,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_destroy_not_found(self): self.assertFalse(self.dataset_process.datasets.filter(id=self.dataset2.id).exists()) self.client.force_login(self.test_user) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.delete( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), ) @@ -700,10 +762,11 @@ class TestProcessDatasets(FixtureAPITestCase): def test_destroy_no_dataset_access_requirement(self): new_corpus = Corpus.objects.create(name="NERV") new_dataset = new_corpus.datasets.create(name="Eva series", description="We created the Evas from Adam", creator=self.user) - ProcessDataset.objects.create(process=self.dataset_process, dataset=new_dataset, sets=new_dataset.sets) + test_sets = list(new_dataset.sets.values_list("name", flat=True)) + ProcessDataset.objects.create(process=self.dataset_process, dataset=new_dataset, sets=test_sets) self.assertTrue(ProcessDataset.objects.filter(process=self.dataset_process, dataset=new_dataset).exists()) self.client.force_login(self.test_user) - with self.assertNumQueries(6): + with self.assertNumQueries(7): response = self.client.delete( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": new_dataset.id}), ) @@ -718,7 +781,7 @@ class TestProcessDatasets(FixtureAPITestCase): self.dataset_process.save() self.client.force_login(self.test_user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.delete( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset2.id}), ) @@ -740,7 +803,7 @@ class TestProcessDatasets(FixtureAPITestCase): self.client.force_login(self.test_user) self.dataset_process.tasks.create(run=0, depth=0, slug="makrout") - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.delete( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), ) @@ -750,7 +813,7 @@ class TestProcessDatasets(FixtureAPITestCase): def test_destroy(self): self.client.force_login(self.test_user) - with self.assertNumQueries(6): + with self.assertNumQueries(7): response = self.client.delete( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), ) @@ -765,7 +828,7 @@ class TestProcessDatasets(FixtureAPITestCase): self.process_dataset_1.sets = ["test"] self.process_dataset_1.save() self.client.force_login(self.test_user) - with self.assertNumQueries(6): + with self.assertNumQueries(7): response = self.client.delete( reverse("api:process-dataset", kwargs={"process": self.dataset_process.id, "dataset": self.dataset1.id}), ) diff --git a/arkindex/process/tests/test_processes.py b/arkindex/process/tests/test_processes.py index 8a4d715cd28551209e30806052cd92607a8243ce..b897d3250d189a0e50e3a99634d317c3522bcc3d 100644 --- a/arkindex/process/tests/test_processes.py +++ b/arkindex/process/tests/test_processes.py @@ -2324,7 +2324,7 @@ class TestProcesses(FixtureAPITestCase): def test_start_process_dataset_requires_dataset_in_same_corpus(self): process2 = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Dataset) - ProcessDataset.objects.create(process=process2, dataset=self.private_dataset, sets=self.private_dataset.sets) + ProcessDataset.objects.create(process=process2, dataset=self.private_dataset, sets=list(self.private_dataset.sets.values_list("name", flat=True))) process2.worker_runs.create(version=self.recognizer, parents=[], configuration=None) self.assertFalse(process2.tasks.exists()) @@ -2341,8 +2341,8 @@ class TestProcesses(FixtureAPITestCase): def test_start_process_dataset_unsupported_parameters(self): process2 = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Dataset) - ProcessDataset.objects.create(process=process2, dataset=self.dataset1, sets=self.dataset1.sets) - ProcessDataset.objects.create(process=process2, dataset=self.private_dataset, sets=self.dataset2.sets) + ProcessDataset.objects.create(process=process2, dataset=self.dataset1, sets=list(self.dataset1.sets.values_list("name", flat=True))) + ProcessDataset.objects.create(process=process2, dataset=self.private_dataset, sets=list(self.dataset2.sets.values_list("name", flat=True))) process2.worker_runs.create(version=self.recognizer, parents=[], configuration=None) self.client.force_login(self.user) @@ -2366,8 +2366,8 @@ class TestProcesses(FixtureAPITestCase): def test_start_process_dataset(self): process2 = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Dataset) - ProcessDataset.objects.create(process=process2, dataset=self.dataset1, sets=self.dataset1.sets) - ProcessDataset.objects.create(process=process2, dataset=self.private_dataset, sets=self.private_dataset.sets) + ProcessDataset.objects.create(process=process2, dataset=self.dataset1, sets=list(self.dataset1.sets.values_list("name", flat=True))) + ProcessDataset.objects.create(process=process2, dataset=self.private_dataset, sets=list(self.private_dataset.sets.values_list("name", flat=True))) run = process2.worker_runs.create(version=self.recognizer, parents=[], configuration=None) self.assertFalse(process2.tasks.exists()) @@ -2562,8 +2562,8 @@ class TestProcesses(FixtureAPITestCase): It should be possible to pass chunks when starting a dataset process """ process = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Dataset) - ProcessDataset.objects.create(process=process, dataset=self.dataset1, sets=self.dataset1.sets) - ProcessDataset.objects.create(process=process, dataset=self.dataset2, sets=self.dataset2.sets) + ProcessDataset.objects.create(process=process, dataset=self.dataset1, sets=list(self.dataset1.sets.values_list("name", flat=True))) + ProcessDataset.objects.create(process=process, dataset=self.dataset2, sets=list(self.dataset2.sets.values_list("name", flat=True))) # Add a worker run to this process run = process.worker_runs.create(version=self.recognizer, parents=[], configuration=None) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 17c92d941b684639b7d6219340ce33da615ba0ed..3c893fc12fc95707cd24b4363ff115a9f67bfbfe 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -109,7 +109,7 @@ from arkindex.training.api import ( DatasetElementDestroy, DatasetElements, DatasetUpdate, - ElementDatasets, + ElementDatasetSets, MetricValueBulkCreate, MetricValueCreate, ModelCompatibleWorkerManage, @@ -184,7 +184,7 @@ api = [ # Datasets path("corpus/<uuid:pk>/datasets/", CorpusDataset.as_view(), name="corpus-datasets"), path("corpus/<uuid:pk>/datasets/selection/", CreateDatasetElementsSelection.as_view(), name="dataset-elements-selection"), - path("element/<uuid:pk>/datasets/", ElementDatasets.as_view(), name="element-datasets"), + path("element/<uuid:pk>/datasets/", ElementDatasetSets.as_view(), name="element-datasets"), path("datasets/<uuid:pk>/", DatasetUpdate.as_view(), name="dataset-update"), path("datasets/<uuid:pk>/clone/", DatasetClone.as_view(), name="dataset-clone"), path("datasets/<uuid:pk>/elements/", DatasetElements.as_view(), name="dataset-elements"), diff --git a/arkindex/project/serializer_fields.py b/arkindex/project/serializer_fields.py index cefaaf14ec832e4ff3014a6dd44a701dc5e604d3..230bafd37351cccedda4d2f0c8298f0b3cc1f916 100644 --- a/arkindex/project/serializer_fields.py +++ b/arkindex/project/serializer_fields.py @@ -4,7 +4,6 @@ from urllib.parse import quote, unquote import bleach from django.contrib.gis.geos import Point -from django.db.models import Count from drf_spectacular.utils import extend_schema_field from rest_framework import serializers @@ -12,6 +11,7 @@ from arkindex.documents.models import MetaType from arkindex.ponos.utils import get_process_from_task_auth from arkindex.process.models import ProcessMode, WorkerRun from arkindex.project.gis import ensure_linear_ring +from arkindex.project.tools import is_prefetched class EnumField(serializers.ChoiceField): @@ -269,7 +269,9 @@ class ArchivedField(serializers.BooleanField): class DatasetSetsCountField(serializers.DictField): """ Serialize the number of element per set on a dataset. - This value can be disabled by setting `sets_count` to False in the context. + + This field is None, unless the sets have been prefetched + with a `element_count` annotation holding the number of elements per set. """ def __init__(self, **kwargs): @@ -281,16 +283,17 @@ class DatasetSetsCountField(serializers.DictField): ) def get_attribute(self, instance): - if not self.context.get("sets_count", True): + # Skip this field if sets are not prefetched, or if they are missing a count + if ( + not is_prefetched(instance.sets) + or not all(hasattr(set, "element_count") for set in instance.sets.all()) + ): return None - elts_count = {k: 0 for k in instance.sets} - elts_count.update( - instance.dataset_elements - .values("set") - .annotate(count=Count("id")) - .values_list("set", "count") - ) - return elts_count + + return { + set.name: set.element_count + for set in instance.sets.all() + } class NullField(serializers.CharField): diff --git a/arkindex/project/tools.py b/arkindex/project/tools.py index 72fde5b89d017d4f47ccafbfd2c69de69392281d..74bc0dd81f3629a722797d88824f3a0d7caa5695 100644 --- a/arkindex/project/tools.py +++ b/arkindex/project/tools.py @@ -188,3 +188,28 @@ def fake_now(): Fake creation date for fixtures and test objects """ return datetime(2020, 2, 2, 1, 23, 45, 678000, tzinfo=timezone.utc) + + +def is_prefetched(related_manager) -> bool: + """ + Determines whether the related items for a reverse foreign key have been prefetched; + that is, if calling `instance.things.all()` will not cause an SQL query. + Usage: `is_prefetched(instance.things)` + """ + return ( + related_manager.field.remote_field.get_cache_name() + in getattr(related_manager.instance, "_prefetched_objects_cache", {}) + ) + + +def add_as_prefetch(related_manager, items) -> None: + """ + Manually set a list of related items on an instance, as if they were actually prefetched from the database. + Usage: `add_as_prefetch(instance.things, [thing1, thing2])` + """ + assert ( + isinstance(items, list) and all(isinstance(item, related_manager.model) for item in items) + ), f"Prefetched items should be a list of {related_manager.model} instances." + cache = getattr(related_manager.instance, "_prefetched_objects_cache", {}) + cache[related_manager.field.remote_field.get_cache_name()] = items + related_manager.instance._prefetched_objects_cache = cache diff --git a/arkindex/sql_validation/corpus_delete.sql b/arkindex/sql_validation/corpus_delete.sql index a454d05cdbb69ded46a1d1f175a0af1d2f630b59..766566825d6557a396a95ec21749ab734761f9b4 100644 --- a/arkindex/sql_validation/corpus_delete.sql +++ b/arkindex/sql_validation/corpus_delete.sql @@ -185,6 +185,15 @@ FROM "training_datasetelement" WHERE "training_datasetelement"."id" IN (SELECT U0."id" FROM "training_datasetelement" U0 + INNER JOIN "training_datasetset" U1 ON (U0."set_id" = U1."id") + INNER JOIN "training_dataset" U2 ON (U1."dataset_id" = U2."id") + WHERE U2."corpus_id" = '{corpus_id}'::uuid); + +DELETE +FROM "training_datasetset" +WHERE "training_datasetset"."id" IN + (SELECT U0."id" + FROM "training_datasetset" U0 INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id") WHERE U1."corpus_id" = '{corpus_id}'::uuid); diff --git a/arkindex/sql_validation/corpus_delete_top_level_type.sql b/arkindex/sql_validation/corpus_delete_top_level_type.sql index 9c7ae8cd60709aa0bd672e1f72dbb020aea86153..d64cf0bb8b2eaabeb9a56b71612ba54223d8f0d2 100644 --- a/arkindex/sql_validation/corpus_delete_top_level_type.sql +++ b/arkindex/sql_validation/corpus_delete_top_level_type.sql @@ -189,8 +189,17 @@ FROM "training_datasetelement" WHERE "training_datasetelement"."id" IN (SELECT U0."id" FROM "training_datasetelement" U0 - INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id") - WHERE U1."corpus_id" = '{corpus_id}'::uuid); + INNER JOIN "training_datasetset" U1 ON (U0."set_id" = U1."id") + INNER JOIN "training_dataset" U2 ON (U1."dataset_id" = U2."id") + WHERE U2."corpus_id" = '{corpus_id}'::uuid); + +DELETE +FROM "training_datasetset" +WHERE "training_datasetset"."id" IN + (SELECT U0."id" + FROM "training_datasetset" U0 + INNER JOIN "training_dataset" U1 ON (U0."dataset_id" = U1."id") + WHERE U1."corpus_id" = '{corpus_id}'::uuid); DELETE FROM "training_dataset" diff --git a/arkindex/training/admin.py b/arkindex/training/admin.py index 858f40be148e6249995772cd0794dc88aa4052d2..66f3626575649c6e4a5616165d4a5073079ba93f 100644 --- a/arkindex/training/admin.py +++ b/arkindex/training/admin.py @@ -2,7 +2,7 @@ from django.contrib import admin from enumfields.admin import EnumFieldListFilter from arkindex.project.admin import ArchivedListFilter -from arkindex.training.models import Dataset, MetricKey, MetricValue, Model, ModelVersion +from arkindex.training.models import Dataset, DatasetSet, MetricKey, MetricValue, Model, ModelVersion class ModelAdmin(admin.ModelAdmin): @@ -31,21 +31,17 @@ class MetricKeyAdmin(admin.ModelAdmin): inlines = [MetricValueInline, ] +class DatasetSetInLine(admin.StackedInline): + model = DatasetSet + + class DatasetAdmin(admin.ModelAdmin): list_display = ("name", "corpus", "state") list_filter = (("state", EnumFieldListFilter), "corpus") search_fields = ("name", "description") - fields = ("id", "name", "created", "updated", "description", "corpus", "creator", "task", "sets") + fields = ("id", "name", "created", "updated", "description", "corpus", "creator", "task") readonly_fields = ("id", "created", "updated", "task") - - def get_form(self, *args, **kwargs): - form = super().get_form(*args, **kwargs) - # Add a help text to mention that the set names should be comma-separated. - # This is only done here and not through the usual `help_text=…` in the model - # because this is only relevant to the Django admin and should not appear in - # DRF serializers or the API docs. - form.base_fields["sets"].help_text = "Comma-separated list of set names" - return form + inlines = [DatasetSetInLine, ] admin.site.register(Model, ModelAdmin) diff --git a/arkindex/training/api.py b/arkindex/training/api.py index 353dc6e9286760c278d8895c12e6f5d9bae11135..a2b21048eb3ff3dc8be2aba8b6ce2c03a9868ece 100644 --- a/arkindex/training/api.py +++ b/arkindex/training/api.py @@ -3,7 +3,7 @@ from textwrap import dedent from uuid import UUID from django.db import connection, transaction -from django.db.models import Q +from django.db.models import Count, Prefetch, Q, prefetch_related_objects from django.shortcuts import get_object_or_404 from django.utils.functional import cached_property from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view @@ -29,6 +29,7 @@ from arkindex.project.tools import BulkMap from arkindex.training.models import ( Dataset, DatasetElement, + DatasetSet, DatasetState, MetricValue, Model, @@ -40,7 +41,7 @@ from arkindex.training.serializers import ( DatasetElementInfoSerializer, DatasetElementSerializer, DatasetSerializer, - ElementDatasetSerializer, + ElementDatasetSetSerializer, MetricValueBulkSerializer, MetricValueCreateSerializer, ModelCompatibleWorkerSerializer, @@ -53,12 +54,18 @@ from arkindex.training.serializers import ( from arkindex.users.models import Role from arkindex.users.utils import get_max_level +# A prefetch object that includes the number of elements per set. +DATASET_SET_COUNTS_PREFETCH = Prefetch( + "sets", + DatasetSet.objects.annotate(element_count=Count("set_elements")).order_by("name") +) + def _fetch_datasetelement_neighbors(datasetelements): """ Retrieve the neighbors for a list of DatasetElements, and annotate these DatasetElements with next and previous attributes. - The ElementDatasets endpoint uses arkindex.project.tools.BulkMap to apply this method and + The ElementDatasetSets endpoint uses arkindex.project.tools.BulkMap to apply this method and perform the second request *after* DRF's pagination, because there is no way to perform post-processing after pagination in Django without having to use Django private methods. """ @@ -71,18 +78,18 @@ def _fetch_datasetelement_neighbors(datasetelements): SELECT n.id, lag(element_id) OVER ( - partition BY (n.dataset_id, n.set) + partition BY (n.set_id) order by n.element_id ) as previous, lead(element_id) OVER ( - partition BY (n.dataset_id, n.set) + partition BY (n.set_id) order by n.element_id ) as next FROM training_datasetelement as n - WHERE (dataset_id, set) IN ( - SELECT dataset_id, set + WHERE set_id IN ( + SELECT set_id FROM training_datasetelement WHERE id IN %(ids)s ) @@ -609,6 +616,11 @@ class CorpusDataset(CorpusACLMixin, ListCreateAPIView): def get_queryset(self): return Dataset.objects \ .select_related("creator") \ + .prefetch_related(Prefetch( + "sets", + # Prefetch sets, but ensure they are ordered by name + DatasetSet.objects.order_by("name") + )) \ .filter(corpus=self.corpus) \ .order_by("name") @@ -625,10 +637,6 @@ class CorpusDataset(CorpusACLMixin, ListCreateAPIView): if not self.kwargs: return context context["corpus"] = self.corpus - # Avoids aggregating the number of elements per set on each - # entry, which would cause 1 extra query per dataset - if self.request.method in permissions.SAFE_METHODS: - context["sets_count"] = False return context @@ -686,8 +694,14 @@ class DatasetUpdate(ACLMixin, RetrieveUpdateDestroyAPIView): serializer_class = DatasetSerializer def get_queryset(self): - queryset = Dataset.objects.filter(corpus__in=Corpus.objects.readable(self.request.user)) - return queryset.select_related("corpus", "creator") + queryset = ( + Dataset.objects + .filter(corpus__in=Corpus.objects.readable(self.request.user)) + .select_related("corpus", "creator") + ) + if self.request.method != "DELETE": + queryset = queryset.prefetch_related(DATASET_SET_COUNTS_PREFETCH) + return queryset def check_object_permissions(self, request, obj): super().check_object_permissions(request, obj) @@ -706,8 +720,20 @@ class DatasetUpdate(ACLMixin, RetrieveUpdateDestroyAPIView): if obj.state == DatasetState.Complete: raise ValidationError(detail="This dataset is in complete state and cannot be modified anymore.") + def update(self, request, *args, **kwargs): + # Do exactly the same thing as what DRF does, but without the automatic prefetch cache removal: + # https://github.com/encode/django-rest-framework/blob/2da473c8c8e024e80c13a624782f1da6272812da/rest_framework/mixins.py#L70 + # This allows `set_elements` to still be returned after the update. + partial = kwargs.pop("partial", False) + instance = self.get_object() + serializer = self.get_serializer(instance, data=request.data, partial=partial) + serializer.is_valid(raise_exception=True) + self.perform_update(serializer) + return Response(serializer.data) + def perform_destroy(self, dataset): - dataset.dataset_elements.all().delete() + DatasetElement.objects.filter(set__dataset_id=dataset.id).delete() + dataset.sets.all().delete() super().perform_destroy(dataset) @@ -768,13 +794,13 @@ class DatasetElements(CorpusACLMixin, ListCreateAPIView): def get_queryset(self): qs = ( - self.dataset.dataset_elements - .prefetch_related("element") + DatasetElement.objects.filter(set__dataset_id=self.dataset.id) + .prefetch_related("element", "set") .select_related("element__type", "element__corpus", "element__image__server") .order_by("element_id", "id") ) if "set" in self.request.query_params: - qs = qs.filter(set=self.request.query_params["set"]) + qs = qs.filter(set__name=self.request.query_params["set"]) return qs def get_serializer_context(self): @@ -800,7 +826,7 @@ class DatasetElements(CorpusACLMixin, ListCreateAPIView): ) class DatasetElementDestroy(CorpusACLMixin, DestroyAPIView): """ - Remove an element from a dataset. + Remove an element from a dataset set. Elements can only be removed from **open** datasets. @@ -811,17 +837,15 @@ class DatasetElementDestroy(CorpusACLMixin, DestroyAPIView): lookup_url_kwarg = "element" def destroy(self, request, *args, **kwargs): - if not self.request.query_params.get("set"): + if not (set_name := self.request.query_params.get("set")): raise ValidationError({"set": ["This field is required."]}) dataset_element = get_object_or_404( - DatasetElement.objects.select_related("dataset__corpus"), - dataset_id=self.kwargs["dataset"], - element_id=self.kwargs["element"], - set=self.request.query_params.get("set") + DatasetElement.objects.select_related("set__dataset__corpus").filter(set__dataset_id=self.kwargs["dataset"], set__name=set_name), + element_id=self.kwargs["element"] ) - if dataset_element.dataset.state != DatasetState.Open: + if dataset_element.set.dataset.state != DatasetState.Open: raise ValidationError({"dataset": ["Elements can only be removed from open Datasets."]}) - if not self.has_write_access(dataset_element.dataset.corpus): + if not self.has_write_access(dataset_element.set.dataset.corpus): raise PermissionDenied(detail="You need a Contributor access to the dataset to perform this action.") dataset_element.delete() return Response(status=status.HTTP_204_NO_CONTENT) @@ -896,14 +920,14 @@ class CreateDatasetElementsSelection(CorpusACLMixin, CreateAPIView): ) ], ) -class ElementDatasets(CorpusACLMixin, ListAPIView): +class ElementDatasetSets(CorpusACLMixin, ListAPIView): """ - List all datasets containing a specific element. + List all dataset sets containing a specific element. Requires a **guest** access to the element's corpus. """ permission_classes = (IsVerifiedOrReadOnly, ) - serializer_class = ElementDatasetSerializer + serializer_class = ElementDatasetSetSerializer @cached_property def element(self): @@ -915,9 +939,14 @@ class ElementDatasets(CorpusACLMixin, ListAPIView): def get_queryset(self): qs = ( - self.element.dataset_elements.all() - .select_related("dataset__creator") - .order_by("dataset__name", "set", "dataset_id") + self.element.dataset_elements + .select_related("set__dataset__creator") + .prefetch_related(Prefetch( + "set__dataset__sets", + # Prefetch sets, but ensure they are ordered by name + DatasetSet.objects.order_by("name") + )) + .order_by("set__dataset__name", "set__name") ) with_neighbors = self.request.query_params.get("with_neighbors", "false") @@ -926,13 +955,6 @@ class ElementDatasets(CorpusACLMixin, ListAPIView): return qs - def get_serializer_context(self): - context = super().get_serializer_context() - # Avoids aggregating the number of elements per set on each - # entry, which would cause 1 extra query per dataset - context["sets_count"] = False - return context - @extend_schema_view( post=extend_schema( @@ -995,11 +1017,24 @@ class DatasetClone(CorpusACLMixin, CreateAPIView): clone.creator = request.user clone.save() + # Clone dataset sets + cloned_sets = DatasetSet.objects.bulk_create([ + DatasetSet(dataset_id=clone.id, name=set.name) + for set in dataset.sets.all() + ]) + set_map = {set.name: set for set in cloned_sets} + # Associate all elements to the clone DatasetElement.objects.bulk_create([ - DatasetElement(element_id=elt_id, dataset_id=clone.id, set=set_name) - for elt_id, set_name in dataset.dataset_elements.values_list("element_id", "set") + DatasetElement(element_id=elt_id, set=set_map[set_name]) + for elt_id, set_name in DatasetElement.objects.filter(set__dataset_id=dataset.id) + .values_list("element_id", "set__name") + .iterator() ]) + + # Add the set counts to the API response + prefetch_related_objects([clone], DATASET_SET_COUNTS_PREFETCH) + return Response( DatasetSerializer(clone).data, status=status.HTTP_201_CREATED, diff --git a/arkindex/training/migrations/0001_initial.py b/arkindex/training/migrations/0001_initial.py index 154f97adfd7e8f83f0e3b66120988d07864f1f6d..ba280c7d4a06e0b06710c25a9dc3284d10aef9ba 100644 --- a/arkindex/training/migrations/0001_initial.py +++ b/arkindex/training/migrations/0001_initial.py @@ -8,9 +8,13 @@ import django.db.models.deletion import enumfields.fields from django.db import migrations, models +import arkindex.process.models import arkindex.project.aws import arkindex.project.fields -import arkindex.training.models + + +def default_sets(): + return ["training", "test", "validation"] class Migration(migrations.Migration): @@ -32,7 +36,7 @@ class Migration(migrations.Migration): ("name", models.CharField(max_length=100, validators=[django.core.validators.MinLengthValidator(1)])), ("description", models.TextField(validators=[django.core.validators.MinLengthValidator(1)])), ("state", enumfields.fields.EnumField(default="open", enum=arkindex.training.models.DatasetState, max_length=10)), - ("sets", django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=50, validators=[django.core.validators.MinLengthValidator(1)]), default=arkindex.training.models.default_sets, size=None, validators=[django.core.validators.MinLengthValidator(1), arkindex.training.models.validate_unique_set_names])), + ("sets", django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=50, validators=[django.core.validators.MinLengthValidator(1)]), default=default_sets, size=None, validators=[django.core.validators.MinLengthValidator(1), arkindex.process.models.validate_unique_set_names])), ("corpus", models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="datasets", to="documents.corpus")), ], ), diff --git a/arkindex/training/migrations/0007_datasetset_model.py b/arkindex/training/migrations/0007_datasetset_model.py new file mode 100644 index 0000000000000000000000000000000000000000..7f21a630dc4b22f6cca6c3d83c32ab867b66f0d4 --- /dev/null +++ b/arkindex/training/migrations/0007_datasetset_model.py @@ -0,0 +1,132 @@ +# Generated by Django 4.1.7 on 2024-03-05 16:28 + +import uuid + +import django.core.validators +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("training", "0006_model_archived"), + ("process", "0029_processdataset_sets"), + ] + + operations = [ + migrations.CreateModel( + name="DatasetSet", + fields=[ + ("id", models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ("name", models.CharField(max_length=50, validators=[django.core.validators.MinLengthValidator(1)])), + ("dataset", models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="tmp_sets", to="training.dataset")), + ], + ), + migrations.AddConstraint( + model_name="datasetset", + constraint=models.UniqueConstraint(fields=("dataset", "name"), name="unique_dataset_sets"), + ), + # Make the old set name and dataset ID fields nullable + # so that they can be filled in when rolling the migration back + migrations.AlterField( + model_name="datasetelement", + name="set", + field=models.CharField(max_length=50, null=True), + ), + migrations.AlterField( + model_name="datasetelement", + name="dataset", + field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="elements", to="training.dataset", null=True), + ), + migrations.AddField( + model_name="datasetelement", + # Temporary name, because the `set` column already existed as the set name. + # This is referred to as `set_id_id` in the SQL migration, + # and renamed to `set` afterwards. + name="set_id", + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name="dataset_elements", to="training.datasetset"), + ), + migrations.RunSQL( + [ + """ + INSERT INTO training_datasetset (id, dataset_id, name) + SELECT uuid_generate_v4(), ds.id, ds.set + FROM ( + SELECT DISTINCT id, unnest(sets) AS set + FROM training_dataset + ) ds + """, + """ + INSERT INTO training_datasetset (id, dataset_id, name) + SELECT uuid_generate_v4(), sets.dataset_id, sets.set + FROM ( + SELECT DISTINCT dataset_id, set + FROM training_datasetelement + ) sets + ON CONFLICT DO NOTHING + """, + """ + UPDATE training_datasetelement de + SET set_id_id = ds.id + FROM training_datasetset ds + WHERE de.dataset_id = ds.dataset_id AND de.set = ds.name + """, + ], + reverse_sql=[ + """ + UPDATE training_dataset + SET sets = ARRAY( + SELECT name + FROM training_datasetset + WHERE dataset_id = training_dataset.id + ) + """, + """ + UPDATE training_datasetelement de + SET dataset_id = ds.dataset_id, set = ds.name + FROM training_datasetset ds + WHERE ds.id = de.set_id_id + """, + ], + ), + migrations.RemoveConstraint( + model_name="datasetelement", + name="unique_dataset_elements", + ), + migrations.RemoveField( + model_name="datasetelement", + name="dataset" + ), + migrations.RemoveField( + model_name="datasetelement", + name="set" + ), + migrations.RenameField( + model_name="datasetelement", + old_name="set_id", + new_name="set" + ), + migrations.AlterField( + model_name="datasetelement", + name="set", + field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="set_elements", to="training.datasetset"), + ), + migrations.AddConstraint( + model_name="datasetelement", + constraint=models.UniqueConstraint(fields=("element_id", "set_id"), name="unique_set_element"), + ), + migrations.RemoveField( + model_name="dataset", + name="sets" + ), + migrations.RemoveField( + model_name="dataset", + name="elements", + ), + migrations.AlterField( + model_name="datasetset", + name="dataset", + field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name="sets", to="training.dataset"), + ), + ] diff --git a/arkindex/training/models.py b/arkindex/training/models.py index adf03e819371768f770960fad9b778e44ba5b27c..c37ea35b09193f087793b2c48c194cc5f3c64e48 100644 --- a/arkindex/training/models.py +++ b/arkindex/training/models.py @@ -4,8 +4,6 @@ from hashlib import sha256 from django.conf import settings from django.contrib.contenttypes.fields import GenericRelation -from django.contrib.postgres.fields import ArrayField -from django.core.exceptions import ValidationError from django.core.validators import MinLengthValidator from django.db import models from django.db.models import Q @@ -242,15 +240,6 @@ class DatasetState(Enum): Error = "error" -def validate_unique_set_names(sets): - if len(set(sets)) != len(sets): - raise ValidationError("Set names must be unique.") - - -def default_sets(): - return ["training", "test", "validation"] - - class Dataset(models.Model): id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False) created = models.DateTimeField(auto_now_add=True) @@ -278,21 +267,6 @@ class Dataset(models.Model): description = models.TextField(validators=[MinLengthValidator(1)]) state = EnumField(DatasetState, default=DatasetState.Open, max_length=50) - sets = ArrayField( - models.CharField(max_length=50, validators=[MinLengthValidator(1)]), - validators=[ - MinLengthValidator(1), - validate_unique_set_names, - ], - default=default_sets, - ) - - elements = models.ManyToManyField( - "documents.Element", - through="training.DatasetElement", - related_name="datasets", - ) - class Meta: constraints = [ models.UniqueConstraint( @@ -305,24 +279,41 @@ class Dataset(models.Model): return self.name -class DatasetElement(models.Model): +class DatasetSet(models.Model): id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False) + name = models.CharField(max_length=50, validators=[MinLengthValidator(1)]) dataset = models.ForeignKey( Dataset, - related_name="dataset_elements", + related_name="sets", on_delete=models.DO_NOTHING, ) + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["dataset", "name"], + name="unique_dataset_sets", + ), + ] + + +class DatasetElement(models.Model): + id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False) element = models.ForeignKey( "documents.Element", related_name="dataset_elements", on_delete=models.DO_NOTHING, ) - set = models.CharField(max_length=50, validators=[MinLengthValidator(1)]) + set = models.ForeignKey( + DatasetSet, + related_name="set_elements", + on_delete=models.DO_NOTHING, + ) class Meta: constraints = [ models.UniqueConstraint( - fields=["dataset", "element", "set"], - name="unique_dataset_elements", + fields=["element_id", "set_id"], + name="unique_set_element", ), ] diff --git a/arkindex/training/serializers.py b/arkindex/training/serializers.py index ab1e300b586aceb1a7a6761b169349cac2524e12..85b11b1eee1620690a121675a7c41021b8a3ee42 100644 --- a/arkindex/training/serializers.py +++ b/arkindex/training/serializers.py @@ -6,18 +6,20 @@ from textwrap import dedent from django.db import transaction from django.db.models import Count, Q from drf_spectacular.utils import extend_schema_field -from rest_framework import serializers +from rest_framework import permissions, serializers from rest_framework.exceptions import PermissionDenied, ValidationError from rest_framework.validators import UniqueTogetherValidator -from arkindex.documents.models import Element +from arkindex.documents.models import Corpus, Element from arkindex.documents.serializers.elements import ElementListSerializer from arkindex.ponos.models import Task -from arkindex.process.models import ProcessDataset, Worker +from arkindex.process.models import Worker from arkindex.project.serializer_fields import ArchivedField, DatasetSetsCountField, EnumField +from arkindex.project.tools import add_as_prefetch from arkindex.training.models import ( Dataset, DatasetElement, + DatasetSet, DatasetState, MetricKey, MetricMode, @@ -479,6 +481,12 @@ class MetricValueBulkSerializer(serializers.Serializer): return validated_data +class DatasetSetSerializer(serializers.ModelSerializer): + class Meta: + model = DatasetSet + fields = ("id", "name",) + + class DatasetSerializer(serializers.ModelSerializer): state = EnumField( DatasetState, @@ -505,6 +513,13 @@ class DatasetSerializer(serializers.ModelSerializer): help_text="Display name of the user who created the dataset.", ) + set_names = serializers.ListField( + child=serializers.CharField(max_length=50), + write_only=True, + default=serializers.CreateOnlyDefault(["training", "validation", "test"]) + ) + sets = DatasetSetSerializer(many=True, read_only=True) + # When creating the dataset, the dataset's corpus comes from the URL, so the APIView passes it through corpus = serializers.HiddenField(default=_corpus_from_context) @@ -517,16 +532,6 @@ class DatasetSerializer(serializers.ModelSerializer): help_text="Distribution of elements in sets. This value is set to null when listing multiple datasets.", ) - def sets_diff(self, new_sets): - """ - Returns a list of removed sets, and a list of added sets for updates - """ - if not isinstance(self.instance, Dataset): - return [], new_sets - current_sets = set(self.instance.sets) - new_sets = set(new_sets) - return list(current_sets - new_sets), list(new_sets - current_sets) - def validate_state(self, state): """ Dataset's state update is limited to these transitions: @@ -550,27 +555,14 @@ class DatasetSerializer(serializers.ModelSerializer): raise ValidationError(f"Transition from {self.instance.state} to {state} is not allowed.") return state - def validate_sets(self, sets): - """ - Ensure sets are updated in a comprehensible way. - It is either possible to add multiple sets, - remove multiple sets or update a single set. - """ - if sets is not None and len(set(sets)) != len(sets): + def validate_set_names(self, set_names): + if self.context["request"].method not in permissions.SAFE_METHODS and self.context["request"].method != "POST": + raise ValidationError("This API endpoint does not allow updating a dataset's sets.") + if set_names is not None and len(set(set_names)) != len(set_names): raise ValidationError("Set names must be unique.") - - removed, added = self.sets_diff(sets) - if removed and ProcessDataset.objects.filter(sets__overlap=removed, dataset_id=self.instance.id).exists(): - # Sets that are used in a ProcessDataset cannot be renamed or deleted - raise ValidationError("These sets cannot be updated because one or more are selected in a dataset process.") - if not removed or not added: - # Some sets have either been added or removed, but not both; do nothing - return sets - elif len(removed) == 1 and len(added) == 1: - # A single set has been renamed. Move its elements later, while performing the update - return sets - else: - raise ValidationError("Updating those sets is ambiguous because several have changed.") + if set_names is not None and len(set_names) == 0: + raise ValidationError("Either do not specify set names to use the default values, or specify a non-empty list of names.") + return set_names def validate(self, data): data = super().validate(data) @@ -598,13 +590,22 @@ class DatasetSerializer(serializers.ModelSerializer): return data - @transaction.atomic() - def update(self, instance, validated_data): - removed, added = self.sets_diff(validated_data.get("sets", self.instance.sets)) - if len(removed) == 1 and len(added) == 1: - set_from, set_to = *removed, *added - instance.dataset_elements.filter(set=set_from).update(set=set_to) - return super().update(instance, validated_data) + @transaction.atomic + def create(self, validated_data): + set_names = validated_data.pop("set_names") + dataset = Dataset.objects.create(**validated_data) + sets = DatasetSet.objects.bulk_create( + DatasetSet( + name=set_name, + dataset_id=dataset.id + ) for set_name in sorted(set_names) + ) + # We will output set element counts in the API, but we know there are zero, + # so no need to make another query to prefetch the sets and count them + for set in sets: + set.element_count = 0 + add_as_prefetch(dataset.sets, sets) + return dataset class Meta: model = Dataset @@ -613,6 +614,7 @@ class DatasetSerializer(serializers.ModelSerializer): "name", "description", "sets", + "set_names", "set_elements", "state", # Only the corpus ID is actually serialized @@ -647,13 +649,7 @@ class DatasetSerializer(serializers.ModelSerializer): "sets": { "error_messages": { "empty": "Either do not specify set names to use the default values, or specify a non-empty list of names." - }, - "help_text": dedent( - """ - Updating the sets array must either add or remove sets (in this case nothing specific is done), - or rename a single set within the array (all elements linked to the previous set will be moved). - """ - ).strip(), + } } } @@ -673,6 +669,7 @@ class DatasetElementSerializer(serializers.ModelSerializer): default=_dataset_from_context, write_only=True, ) + set = serializers.SlugRelatedField(queryset=DatasetSet.objects.none(), slug_field="name") class Meta: model = DatasetElement @@ -681,7 +678,7 @@ class DatasetElementSerializer(serializers.ModelSerializer): validators = [ UniqueTogetherValidator( queryset=DatasetElement.objects.all(), - fields=["dataset", "element_id", "set"], + fields=["element_id", "set"], message="This element is already part of this set.", ) ] @@ -690,13 +687,12 @@ class DatasetElementSerializer(serializers.ModelSerializer): super().__init__(*args, **kwargs) if dataset := self.context.get("dataset"): self.fields["element_id"].queryset = Element.objects.filter(corpus=dataset.corpus) + self.fields["set"].queryset = dataset.sets.all() - def validate_set(self, value): - # The set must match the `sets` array defined at the dataset level - dataset = self.context["dataset"] - if dataset and value not in dataset.sets: - raise ValidationError(f"This dataset has no set named {value}.") - return value + def validate(self, data): + data = super().validate(data) + data.pop("dataset") + return data class DatasetElementInfoSerializer(DatasetElementSerializer): @@ -714,10 +710,11 @@ class DatasetElementInfoSerializer(DatasetElementSerializer): fields = DatasetElementSerializer.Meta.fields + ("dataset",) -class ElementDatasetSerializer(serializers.ModelSerializer): - dataset = DatasetSerializer() +class ElementDatasetSetSerializer(serializers.ModelSerializer): + dataset = DatasetSerializer(source="set.dataset") previous = serializers.UUIDField(allow_null=True, read_only=True) next = serializers.UUIDField(allow_null=True, read_only=True) + set = serializers.SlugRelatedField(slug_field="name", read_only=True) class Meta: model = DatasetElement @@ -726,35 +723,32 @@ class ElementDatasetSerializer(serializers.ModelSerializer): class SelectionDatasetElementSerializer(serializers.Serializer): - dataset_id = serializers.PrimaryKeyRelatedField( - queryset=Dataset.objects.all(), - source="dataset", + set_id = serializers.PrimaryKeyRelatedField( + queryset=DatasetSet.objects.none(), + source="set", write_only=True, - help_text="UUID of a dataset to add elements from your corpus' selection.", + help_text="UUID of a dataset set the elements will be added to.", style={"base_template": "input.html"}, ) - set = serializers.CharField( - max_length=50, - write_only=True, - help_text="Name of the set elements will be added to.", - ) - def validate_dataset_id(self, dataset): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # for openAPI schema generation + if "request" not in self.context: + return + self.fields["set_id"].queryset = DatasetSet.objects.filter( + dataset__corpus_id__in=Corpus.objects.readable(self.context["request"].user) + ).select_related("dataset") + + def validate_set_id(self, set): if ( (corpus := self.context.get("corpus")) - and dataset.corpus_id != corpus.id + and set.dataset.corpus_id != corpus.id ): - raise ValidationError(f"Dataset {dataset.id} is not part of corpus {corpus.name}.") - if dataset.state == DatasetState.Complete: - raise ValidationError(f"Dataset {dataset.id} is marked as completed.") - return dataset - - def validate(self, data): - data = super().validate(data) - dataset = data["dataset"] - if data["set"] not in dataset.sets: - raise ValidationError({"set": [f'This dataset only allows one of {", ".join(dataset.sets)}.']}) - return data + raise ValidationError(f"Dataset {set.dataset.id} is not part of corpus {corpus.name}.") + if set.dataset.state == DatasetState.Complete: + raise ValidationError(f"Dataset {set.dataset.id} is marked as completed.") + return set def create(self, validated_data): user = self.context["request"].user diff --git a/arkindex/training/tests/test_datasets_api.py b/arkindex/training/tests/test_datasets_api.py index 3cba65910c3166cf9a63ee879984fc438190539b..e7b54da3441a538fa42805aaabe0da6086aa38e2 100644 --- a/arkindex/training/tests/test_datasets_api.py +++ b/arkindex/training/tests/test_datasets_api.py @@ -9,7 +9,7 @@ from arkindex.documents.models import Corpus from arkindex.process.models import Process, ProcessDataset, ProcessMode from arkindex.project.tests import FixtureAPITestCase from arkindex.project.tools import fake_now -from arkindex.training.models import Dataset, DatasetState +from arkindex.training.models import Dataset, DatasetElement, DatasetSet, DatasetState from arkindex.users.models import Role, User # Using the fake DB fixtures creation date when needed @@ -33,6 +33,7 @@ class TestDatasetsAPI(FixtureAPITestCase): ProcessDataset.objects.create(process=cls.process, dataset=cls.dataset, sets=["training", "test", "validation"]) ProcessDataset.objects.create(process=cls.process, dataset=cls.dataset2, sets=["test"]) cls.private_dataset = Dataset.objects.create(name="Private Dataset", description="Dead Sea Scrolls", corpus=cls.private_corpus, creator=cls.dataset_creator) + cls.private_dataset_set = DatasetSet.objects.create(dataset=cls.private_dataset, name="Private set") cls.vol = cls.corpus.elements.get(name="Volume 1") cls.page1 = cls.corpus.elements.get(name="Volume 1, page 1r") cls.page2 = cls.corpus.elements.get(name="Volume 1, page 1v") @@ -75,7 +76,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_list(self): self.client.force_login(self.user) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.get(reverse("api:corpus-datasets", kwargs={"pk": self.corpus.pk})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()["results"], [ @@ -83,7 +84,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "name": "First Dataset", "description": "dataset number one", "id": str(self.dataset.pk), - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "creator": "Test user", @@ -96,7 +103,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "name": "Second Dataset", "description": "dataset number two", "id": str(self.dataset2.pk), - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset2.sets.order_by("name") + ], "set_elements": None, "state": "open", "creator": "Test user", @@ -254,7 +267,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_create(self): self.client.force_login(self.user) - with self.assertNumQueries(6): + with self.assertNumQueries(8): response = self.client.post( reverse("api:corpus-datasets", kwargs={"pk": self.corpus.pk}), data={"name": "My dataset", "description": "My dataset for my experiments."}, @@ -266,7 +279,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(created_dataset.id), "name": "My dataset", "description": "My dataset for my experiments.", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in created_dataset.sets.order_by("name") + ], "set_elements": { "training": 0, "test": 0, @@ -282,7 +301,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_create_state_ignored(self): self.client.force_login(self.user) - with self.assertNumQueries(6): + with self.assertNumQueries(8): response = self.client.post( reverse("api:corpus-datasets", kwargs={"pk": self.corpus.pk}), data={"name": "My dataset", "description": "My dataset for my experiments.", "state": "complete"}, @@ -295,8 +314,18 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(created_dataset.id), "name": "My dataset", "description": "My dataset for my experiments.", - "sets": ["training", "test", "validation"], - "set_elements": {"test": 0, "training": 0, "validation": 0}, + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in created_dataset.sets.order_by("name") + ], + "set_elements": { + "training": 0, + "test": 0, + "validation": 0, + }, "state": "open", "creator": "Test user", "task_id": None, @@ -307,10 +336,10 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_create_sets(self): self.client.force_login(self.user) - with self.assertNumQueries(6): + with self.assertNumQueries(8): response = self.client.post( reverse("api:corpus-datasets", kwargs={"pk": self.corpus.pk}), - data={"name": "My dataset", "description": "My dataset for my experiments.", "sets": ["a", "b", "c", "d"]}, + data={"name": "My dataset", "description": "My dataset for my experiments.", "set_names": ["a", "b", "c", "d"]}, format="json" ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) @@ -319,7 +348,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(created_dataset.id), "name": "My dataset", "description": "My dataset for my experiments.", - "sets": ["a", "b", "c", "d"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in created_dataset.sets.order_by("name") + ], "set_elements": {"a": 0, "b": 0, "c": 0, "d": 0}, "state": "open", "creator": "Test user", @@ -329,39 +364,39 @@ class TestDatasetsAPI(FixtureAPITestCase): "updated": created_dataset.updated.isoformat().replace("+00:00", "Z"), }) - def test_create_sets_length(self): + def test_create_sets_empty_list(self): self.client.force_login(self.user) with self.assertNumQueries(3): response = self.client.post( reverse("api:corpus-datasets", kwargs={"pk": self.corpus.pk}), - data={"name": "My dataset", "description": "My dataset for my experiments.", "sets": []}, + data={"name": "My dataset", "description": "My dataset for my experiments.", "set_names": []}, format="json" ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), {"sets": ["Either do not specify set names to use the default values, or specify a non-empty list of names."]}) + self.assertDictEqual(response.json(), {"set_names": ["Either do not specify set names to use the default values, or specify a non-empty list of names."]}) def test_create_sets_unique_names(self): self.client.force_login(self.user) with self.assertNumQueries(3): response = self.client.post( reverse("api:corpus-datasets", kwargs={"pk": self.corpus.pk}), - data={"name": "My dataset", "description": "My dataset for my experiments.", "sets": ["a", "a", "b"]}, + data={"name": "My dataset", "description": "My dataset for my experiments.", "set_names": ["a", "a", "b"]}, format="json" ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), {"sets": ["Set names must be unique."]}) + self.assertDictEqual(response.json(), {"set_names": ["Set names must be unique."]}) def test_create_sets_blank_names(self): self.client.force_login(self.user) with self.assertNumQueries(3): response = self.client.post( reverse("api:corpus-datasets", kwargs={"pk": self.corpus.pk}), - data={"name": "My dataset", "description": "My dataset for my experiments.", "sets": [" ", " ", "b"]}, + data={"name": "My dataset", "description": "My dataset for my experiments.", "set_names": [" ", " ", "b"]}, format="json" ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "sets": + "set_names": { "0": ["This field may not be blank."], "1": ["This field may not be blank."] @@ -376,13 +411,13 @@ class TestDatasetsAPI(FixtureAPITestCase): data={ "name": "My Dataset", "description": "My dataset for my experiments.", - "sets": ["unit-00", "Etiam accumsan ullamcorper mauris eget mattis. Ut porttitor."] + "set_names": ["unit-00", "Etiam accumsan ullamcorper mauris eget mattis. Ut porttitor."] }, format="json" ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "sets": { + "set_names": { "1": ["Ensure this field has no more than 50 characters."] } }) @@ -396,7 +431,6 @@ class TestDatasetsAPI(FixtureAPITestCase): data={ "name": "Shin Seiki Evangelion", "description": "Omedeto!", - "sets": ["unit-01", "unit-00", "unit-02"], }, format="json" ) @@ -412,7 +446,6 @@ class TestDatasetsAPI(FixtureAPITestCase): data={ "name": "Shin Seiki Evangelion", "description": "Omedeto!", - "sets": ["unit-01", "unit-00", "unit-02"], }, format="json" ) @@ -422,13 +455,12 @@ class TestDatasetsAPI(FixtureAPITestCase): @patch("arkindex.project.mixins.has_access", return_value=False) def test_update_requires_write_corpus(self, has_access_mock): self.client.force_login(self.read_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ "name": "Shin Seiki Evangelion", "description": "Omedeto!", - "sets": ["unit-01", "unit-00", "unit-02"], }, format="json" ) @@ -446,7 +478,6 @@ class TestDatasetsAPI(FixtureAPITestCase): data={ "name": "Shin Seiki Evangelion", "description": "Omedeto!", - "sets": ["unit-01", "unit-00", "unit-02"], }, format="json" ) @@ -455,7 +486,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_update_name_too_long(self): self.client.force_login(self.user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ @@ -470,13 +501,12 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_update_name_already_exists_in_corpus(self): Dataset.objects.create(name="Another Dataset", description="A set of data", corpus=self.corpus, creator=self.dataset_creator) self.client.force_login(self.user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ "name": "Another Dataset", "description": "My dataset for my experiments.", - "sets": self.dataset.sets, }, format="json" ) @@ -485,7 +515,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_update_requires_all_fields(self): self.client.force_login(self.user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={"name": "Shin Seiki Evangelion"}, @@ -494,228 +524,48 @@ class TestDatasetsAPI(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), {"description": ["This field is required."]}) - def test_update_add_sets(self): - self.client.force_login(self.user) - self.assertIsNone(self.dataset.task_id) - with self.assertNumQueries(8): - response = self.client.put( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "name": "Shin Seiki Evangelion", - "description": "Omedeto!", - "sets": self.dataset.sets + ["unit-01", "unit-02"], - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.dataset.refresh_from_db() - self.assertEqual(self.dataset.state, DatasetState.Open) - self.assertEqual(self.dataset.name, "Shin Seiki Evangelion") - self.assertEqual(self.dataset.description, "Omedeto!") - self.assertListEqual(self.dataset.sets, ["training", "test", "validation", "unit-01", "unit-02"]) - self.assertIsNone(self.dataset.task_id) - - def test_update_remove_sets(self): - """ - It is possible to remove many sets, no elements are moved - """ - # Remove ProcessDataset relation - ProcessDataset.objects.get(process=self.process, dataset=self.dataset).delete() - self.client.force_login(self.user) - dataset_elt = self.dataset.dataset_elements.create(element=self.page1, set="training") - with self.assertNumQueries(9): - response = self.client.put( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "name": "Shin Seiki Evangelion", - "description": "Omedeto!", - # train and validation sets are removed - "sets": ["test"], - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.dataset.refresh_from_db() - self.assertEqual(self.dataset.state, DatasetState.Open) - self.assertEqual(self.dataset.name, "Shin Seiki Evangelion") - self.assertEqual(self.dataset.description, "Omedeto!") - self.assertListEqual(self.dataset.sets, ["test"]) - self.assertIsNone(self.dataset.task_id) - dataset_elt.refresh_from_db() - self.assertEqual(dataset_elt.element, self.page1) - self.assertEqual(dataset_elt.dataset, self.dataset) - # Element is still linked to a set that does not exist anymore on the dataset - self.assertEqual(dataset_elt.set, "training") - - def test_update_sets_update_single_set(self): - """ - It is possible to rename a single set, if it is not referenced by a ProcessDataset - """ - ProcessDataset.objects.get(process=self.process, dataset=self.dataset, sets=["training", "test", "validation"]).delete() - self.client.force_login(self.user) - self.dataset.dataset_elements.create(element_id=self.page1.id, set="training") - self.dataset.dataset_elements.create(element_id=self.page2.id, set="validation") - self.dataset.dataset_elements.create(element_id=self.page3.id, set="validation") - with self.assertNumQueries(10): - response = self.client.put( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "name": "Shin Seiki Evangelion", - "description": "Omedeto!", - # validation set is renamed to AAAAAAA - "sets": ["test", "training", "AAAAAAA"], - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.dataset.refresh_from_db() - self.assertEqual(self.dataset.state, DatasetState.Open) - self.assertEqual(self.dataset.name, "Shin Seiki Evangelion") - self.assertEqual(self.dataset.description, "Omedeto!") - self.assertListEqual(self.dataset.sets, ["test", "training", "AAAAAAA"]) - self.assertIsNone(self.dataset.task_id) - self.assertQuerysetEqual( - self.dataset.dataset_elements.values_list("set", "element__name").order_by("element__name"), - [ - ("training", "Volume 1, page 1r"), - ("AAAAAAA", "Volume 1, page 1v"), - ("AAAAAAA", "Volume 1, page 2r"), - ] - ) - - def test_update_sets_processdataset_reference(self): - """ - If a dataset's sets are referenced by a ProcessDataset, they cannot be updated - """ + def test_update_sets_forbidden(self): self.client.force_login(self.user) - self.dataset.dataset_elements.create(element_id=self.page1.id, set="training") - self.dataset.dataset_elements.create(element_id=self.page2.id, set="validation") - self.dataset.dataset_elements.create(element_id=self.page3.id, set="validation") - with self.assertNumQueries(4): - response = self.client.put( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "name": "Shin Seiki Evangelion", - "description": "Omedeto!", - # validation set is renamed to AAAAAAA - "sets": ["test", "training", "AAAAAAA"], - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), {"sets": ["These sets cannot be updated because one or more are selected in a dataset process."]}) - self.dataset.refresh_from_db() - self.assertEqual(self.dataset.state, DatasetState.Open) - self.assertEqual(self.dataset.name, "First Dataset") - self.assertEqual(self.dataset.description, "dataset number one") - self.assertListEqual(self.dataset.sets, ["training", "test", "validation"]) self.assertIsNone(self.dataset.task_id) - self.assertQuerysetEqual( - self.dataset.dataset_elements.values_list("set", "element__name").order_by("element__name"), - [ - ("training", "Volume 1, page 1r"), - ("validation", "Volume 1, page 1v"), - ("validation", "Volume 1, page 2r"), - ] - ) - - def test_update_sets_ambiguous(self): - """ - No more than one set can be updated - """ - # Remove ProcessDataset relation - ProcessDataset.objects.get(process=self.process, dataset=self.dataset).delete() - self.client.force_login(self.user) with self.assertNumQueries(4): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ "name": "Shin Seiki Evangelion", "description": "Omedeto!", - "sets": ["test", "AAAAAAA", "BBBBBBB"], - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json(), {"sets": ["Updating those sets is ambiguous because several have changed."]}) - - def test_update_sets_length(self): - self.client.force_login(self.user) - with self.assertNumQueries(3): - response = self.client.put( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "name": "Shin Seiki Evangelion", - "description": "Omedeto!", - "sets": [] - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), {"sets": ["Either do not specify set names to use the default values, or specify a non-empty list of names."]}) - - def test_update_sets_unique_names(self): - self.client.force_login(self.user) - with self.assertNumQueries(3): - response = self.client.put( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "name": "Shin Seiki Evangelion", - "description": "Omedeto!", - "sets": ["unit-01", "unit-01", "unit-02"] + "set_names": ["unit-01", "unit-02"], }, format="json" ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), {"sets": ["Set names must be unique."]}) - - def test_update_sets_name_too_long(self): - self.client.force_login(self.user) - with self.assertNumQueries(3): - response = self.client.put( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "name": "My Dataset", - "description": "My dataset for my experiments.", - "sets": ["unit-00", "Etiam accumsan ullamcorper mauris eget mattis. Ut porttitor."] - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), { - "sets": { - "1": ["Ensure this field has no more than 50 characters."] - } - }) + self.assertDictEqual(response.json(), {"set_names": ["This API endpoint does not allow updating a dataset's sets."]}) def test_update_empty_or_blank_description_or_name(self): self.client.force_login(self.user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ "name": "", "description": " ", - "sets": "", }, format="json" ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { "name": ["This field may not be blank."], - "description": ["This field may not be blank."], - "sets": ['Expected a list of items but got type "str".'], + "description": ["This field may not be blank."] }) def test_update_all_errors(self): self.client.force_login(self.user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ "name": "", "description": " ", - "sets": ["unit-00", "unit-00", "unit-02"] + "set_names": ["unit-00", "unit-00", "unit-02"] }, format="json" ) @@ -723,14 +573,14 @@ class TestDatasetsAPI(FixtureAPITestCase): self.assertDictEqual(response.json(), { "name": ["This field may not be blank."], "description": ["This field may not be blank."], - "sets": ["Set names must be unique."] + "set_names": ["This API endpoint does not allow updating a dataset's sets."] }) def test_update_state_requires_ponos_auth(self): self.client.force_login(self.user) self.dataset.state = DatasetState.Building self.dataset.save() - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ @@ -748,7 +598,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_update_ponos_task_state_update(self): self.dataset.state = DatasetState.Building self.dataset.save() - with self.assertNumQueries(8): + with self.assertNumQueries(6): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), HTTP_AUTHORIZATION=f"Ponos {self.task.token}", @@ -813,13 +663,12 @@ class TestDatasetsAPI(FixtureAPITestCase): self.client.force_login(self.user) for new_state in DatasetState: with self.subTest(new_state=new_state): - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ "name": "AA", "description": "BB", - "sets": self.dataset.sets + ["CC"], "state": new_state.value, }, format="json" @@ -832,7 +681,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_update_ponos_task_state_requires_dataset_in_process(self): self.process.process_datasets.all().delete() - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), HTTP_AUTHORIZATION=f"Ponos {self.task.token}", @@ -849,7 +698,7 @@ class TestDatasetsAPI(FixtureAPITestCase): }) def test_update_ponos_task_bad_state(self): - with self.assertNumQueries(2): + with self.assertNumQueries(3): response = self.client.put( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), HTTP_AUTHORIZATION=f"Ponos {self.task.token}", @@ -892,7 +741,7 @@ class TestDatasetsAPI(FixtureAPITestCase): @patch("arkindex.project.mixins.has_access", return_value=False) def test_partial_update_requires_write_corpus(self, has_access_mock): self.client.force_login(self.read_user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={"name": "Shin Seiki Evangelion"}, @@ -906,7 +755,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_partial_update_name_too_long(self): self.client.force_login(self.user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ @@ -920,7 +769,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_partial_update_name_already_exists_in_corpus(self): Dataset.objects.create(name="Another Dataset", description="A set of data", corpus=self.corpus, creator=self.dataset_creator) self.client.force_login(self.user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ @@ -933,7 +782,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_partial_update(self): self.client.force_login(self.user) - with self.assertNumQueries(8): + with self.assertNumQueries(6): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ @@ -946,11 +795,11 @@ class TestDatasetsAPI(FixtureAPITestCase): self.assertEqual(self.dataset.state, DatasetState.Open) self.assertEqual(self.dataset.name, "First Dataset") self.assertEqual(self.dataset.description, "Omedeto!") - self.assertListEqual(self.dataset.sets, ["training", "test", "validation"]) + self.assertCountEqual(list(self.dataset.sets.values_list("name", flat=True)), ["training", "test", "validation"]) def test_partial_update_empty_or_blank_description_or_name(self): self.client.force_login(self.user) - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ @@ -964,7 +813,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_partial_update_requires_ponos_auth(self): self.client.force_login(self.user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ @@ -978,7 +827,7 @@ class TestDatasetsAPI(FixtureAPITestCase): }) def test_partial_update_ponos_task_state_update(self): - with self.assertNumQueries(8): + with self.assertNumQueries(6): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), HTTP_AUTHORIZATION=f"Ponos {self.task.token}", @@ -993,7 +842,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_partial_update_ponos_task_state_requires_dataset_in_process(self): self.process.process_datasets.all().delete() - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), HTTP_AUTHORIZATION=f"Ponos {self.task.token}", @@ -1008,7 +857,7 @@ class TestDatasetsAPI(FixtureAPITestCase): }) def test_partial_update_ponos_task_bad_state(self): - with self.assertNumQueries(2): + with self.assertNumQueries(3): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), HTTP_AUTHORIZATION=f"Ponos {self.task.token}", @@ -1022,23 +871,6 @@ class TestDatasetsAPI(FixtureAPITestCase): self.dataset.refresh_from_db() self.assertEqual(self.dataset.state, DatasetState.Open) - def test_partial_update_sets_name_too_long(self): - self.client.force_login(self.user) - with self.assertNumQueries(3): - response = self.client.patch( - reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), - data={ - "sets": ["unit-00", "Etiam accumsan ullamcorper mauris eget mattis. Ut porttitor."] - }, - format="json" - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), { - "sets": { - "1": ["Ensure this field has no more than 50 characters."] - } - }) - def test_partial_update_ponos_task_state_forbidden(self): """Dataset's state update is limited to specific transitions""" op, build, complete, error = [DatasetState[state] for state in ("Open", "Building", "Complete", "Error")] @@ -1087,13 +919,12 @@ class TestDatasetsAPI(FixtureAPITestCase): self.client.force_login(self.user) for new_state in DatasetState: with self.subTest(new_state=new_state): - with self.assertNumQueries(3): + with self.assertNumQueries(4): response = self.client.patch( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), data={ "name": "AA", "description": "BB", - "sets": self.dataset.sets + ["CC"], "state": new_state.value, }, format="json" @@ -1160,7 +991,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "name": "First Dataset", "description": "dataset number one", "state": "open", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": {"test": 0, "training": 0, "validation": 0}, "creator": "Test user", "task_id": None, @@ -1224,7 +1061,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_delete(self): self.client.force_login(self.user) - with self.assertNumQueries(6): + with self.assertNumQueries(7): response = self.client.delete( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), ) @@ -1250,14 +1087,15 @@ class TestDatasetsAPI(FixtureAPITestCase): """ DestroyDataset also deletes DatasetElements """ - self.dataset.dataset_elements.create(element_id=self.vol.id, set="test") - self.dataset.dataset_elements.create(element_id=self.vol.id, set="training") - self.dataset.dataset_elements.create(element_id=self.page1.id, set="training") - self.dataset.dataset_elements.create(element_id=self.page2.id, set="validation") - self.dataset.dataset_elements.create(element_id=self.page3.id, set="validation") + test_set, train_set, validation_set = self.dataset.sets.all().order_by("name") + test_set.set_elements.create(element_id=self.vol.id) + train_set.set_elements.create(element_id=self.vol.id, set="training") + train_set.set_elements.create(element_id=self.page1.id, set="training") + validation_set.set_elements.create(element_id=self.page2.id, set="validation") + validation_set.set_elements.create(element_id=self.page3.id, set="validation") self.client.force_login(self.user) - with self.assertNumQueries(6): + with self.assertNumQueries(7): response = self.client.delete( reverse("api:dataset-update", kwargs={"pk": self.dataset.pk}), ) @@ -1266,7 +1104,7 @@ class TestDatasetsAPI(FixtureAPITestCase): with self.assertRaises(Dataset.DoesNotExist): self.dataset.refresh_from_db() - self.assertFalse(self.dataset.dataset_elements.exists()) + self.assertFalse(DatasetElement.objects.filter(set__dataset_id=self.dataset.id).exists()) # No elements should have been deleted self.vol.refresh_from_db() @@ -1296,12 +1134,13 @@ class TestDatasetsAPI(FixtureAPITestCase): self.assertEqual(filter_rights_mock.call_args, call(self.user, Corpus, Role.Guest.value)) def test_list_elements_set_filter_wrong_set(self): - self.dataset.dataset_elements.create(element_id=self.page1.id, set="test") + test_set = self.dataset.sets.order_by("name").first() + test_set.set_elements.create(element_id=self.page1.id) self.client.force_login(self.user) with self.assertNumQueries(4): response = self.client.get( reverse("api:dataset-elements", kwargs={"pk": str(self.dataset.id)}), - data={"set": "aaaaa"} + data={"set": "training"} ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { @@ -1312,10 +1151,11 @@ class TestDatasetsAPI(FixtureAPITestCase): }) def test_list_elements_set_filter(self): - self.dataset.dataset_elements.create(element_id=self.page1.id, set="test") - self.dataset.dataset_elements.create(element_id=self.page2.id, set="training") + test_set, train_set, _ = self.dataset.sets.all().order_by("name") + test_set.set_elements.create(element_id=self.page1.id) + train_set.set_elements.create(element_id=self.page2.id) self.client.force_login(self.user) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.get( reverse("api:dataset-elements", kwargs={"pk": self.dataset.pk}), data={"set": "training", "with_count": "true"}, @@ -1331,10 +1171,11 @@ class TestDatasetsAPI(FixtureAPITestCase): @patch("arkindex.documents.models.Element.thumbnail", MagicMock(s3_url="s3_url")) def test_list_elements(self): - self.dataset.dataset_elements.create(element_id=self.vol.id, set="test") - self.dataset.dataset_elements.create(element_id=self.page1.id, set="training") - self.dataset.dataset_elements.create(element_id=self.page2.id, set="validation") - self.dataset.dataset_elements.create(element_id=self.page3.id, set="validation") + test_set, train_set, validation_set = self.dataset.sets.all().order_by("name") + test_set.set_elements.create(element_id=self.vol.id) + train_set.set_elements.create(element_id=self.page1.id) + validation_set.set_elements.create(element_id=self.page2.id) + validation_set.set_elements.create(element_id=self.page3.id) self.page3.confidence = 0.42 self.page3.mirrored = True self.page3.rotation_angle = 42 @@ -1523,7 +1364,7 @@ class TestDatasetsAPI(FixtureAPITestCase): self.dataset.state = state self.dataset.save() with self.subTest(state=state): - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.get( reverse("api:dataset-elements", kwargs={"pk": self.dataset.pk}), {"page_size": 3}, @@ -1589,7 +1430,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_add_element_wrong_element(self): element = self.private_corpus.elements.create(type=self.private_corpus.types.create(slug="folder")) self.client.force_login(self.user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.post( reverse("api:dataset-elements", kwargs={"pk": self.dataset.id}), data={"set": "test", "element_id": str(element.id)}, @@ -1602,7 +1443,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_add_element_wrong_set(self): self.client.force_login(self.user) - with self.assertNumQueries(4): + with self.assertNumQueries(5): response = self.client.post( reverse("api:dataset-elements", kwargs={"pk": self.dataset.id}), data={"set": "aaaaaaaaaaa", "element_id": str(self.vol.id)}, @@ -1610,7 +1451,7 @@ class TestDatasetsAPI(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "set": ["This dataset has no set named aaaaaaaaaaa."], + "set": ["Object with name=aaaaaaaaaaa does not exist."], }) def test_add_element_dataset_requires_open(self): @@ -1627,9 +1468,10 @@ class TestDatasetsAPI(FixtureAPITestCase): self.assertListEqual(response.json(), ["You can only add elements to a dataset in an open state."]) def test_add_element_already_exists(self): - self.dataset.dataset_elements.create(element=self.page1, set="test") + test_set = self.dataset.sets.order_by("name").first() + test_set.set_elements.create(element=self.page1) self.client.force_login(self.user) - with self.assertNumQueries(5): + with self.assertNumQueries(6): response = self.client.post( reverse("api:dataset-elements", kwargs={"pk": self.dataset.id}), data={"set": "test", "element_id": str(self.page1.id)}, @@ -1639,8 +1481,9 @@ class TestDatasetsAPI(FixtureAPITestCase): self.assertDictEqual(response.json(), {"non_field_errors": ["This element is already part of this set."]}) def test_add_element(self): + train_set = self.dataset.sets.get(name="training") self.client.force_login(self.user) - with self.assertNumQueries(10): + with self.assertNumQueries(11): response = self.client.post( reverse("api:dataset-elements", kwargs={"pk": self.dataset.id}), data={"set": "training", "element_id": str(self.page1.id)}, @@ -1648,10 +1491,12 @@ class TestDatasetsAPI(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) self.assertQuerysetEqual( - self.dataset.dataset_elements.values_list("set", "element__name").order_by("element__name"), + train_set.set_elements.values_list("set__name", "element__name").order_by("element__name"), [("training", "Volume 1, page 1r")] ) + # CreateDatasetElementSelection + def test_add_from_selection_requires_login(self): with self.assertNumQueries(0): response = self.client.post(reverse("api:dataset-elements-selection", kwargs={"pk": self.corpus.id})) @@ -1710,8 +1555,7 @@ class TestDatasetsAPI(FixtureAPITestCase): response = self.client.post(reverse("api:dataset-elements-selection", kwargs={"pk": self.corpus.id})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "dataset_id": ["This field is required."], - "set": ["This field is required."], + "set_id": ["This field is required."], }) def test_add_from_selection_wrong_values(self): @@ -1719,61 +1563,50 @@ class TestDatasetsAPI(FixtureAPITestCase): with self.assertNumQueries(3): response = self.client.post( reverse("api:dataset-elements-selection", kwargs={"pk": self.corpus.id}), - data={"set": {}, "dataset_id": "AAA"}, + data={"set_id": "AAA"}, format="json", ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "dataset_id": ["“AAA†is not a valid UUID."], - "set": ["Not a valid string."], + "set_id": ["“AAA†is not a valid UUID."], }) def test_add_from_selection_wrong_dataset(self): + self.private_corpus.memberships.create(user=self.user, level=Role.Contributor.value) self.client.force_login(self.user) with self.assertNumQueries(4): response = self.client.post( reverse("api:dataset-elements-selection", kwargs={"pk": self.corpus.id}), - data={"set": "aaa", "dataset_id": self.private_dataset.id}, + data={"set_id": self.private_dataset_set.id}, format="json", ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "dataset_id": [f"Dataset {self.private_dataset.id} is not part of corpus Unit Tests."], + "set_id": [f"Dataset {self.private_dataset.id} is not part of corpus Unit Tests."], }) def test_add_from_selection_completed_dataset(self): """A dataset in the Complete state is immutable""" + test_set = self.dataset.sets.first() self.client.force_login(self.user) self.dataset.state = DatasetState.Complete self.dataset.save() with self.assertNumQueries(4): response = self.client.post( reverse("api:dataset-elements-selection", kwargs={"pk": self.corpus.id}), - data={"set": "aaa", "dataset_id": self.dataset.id}, - format="json", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertDictEqual(response.json(), { - "dataset_id": [f"Dataset {self.dataset.id} is marked as completed."] - }) - - def test_add_from_selection_wrong_set(self): - self.client.force_login(self.user) - with self.assertNumQueries(4): - response = self.client.post( - reverse("api:dataset-elements-selection", kwargs={"pk": self.corpus.id}), - data={"set": "aaa", "dataset_id": self.dataset.id}, + data={"set_id": test_set.id}, format="json", ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), { - "set": ["This dataset only allows one of training, test, validation."], + "set_id": [f"Dataset {self.dataset.id} is marked as completed."] }) def test_add_from_selection(self): - self.dataset.dataset_elements.create(element=self.page1, set="training") + train_set = self.dataset.sets.get(name="training") + train_set.set_elements.create(element=self.page1) self.assertQuerysetEqual( - self.dataset.dataset_elements.values_list("set", "element__name").order_by("element__name"), + train_set.set_elements.values_list("set__name", "element__name").order_by("element__name"), [("training", "Volume 1, page 1r")] ) self.user.selected_elements.set([self.vol, self.page1, self.page2]) @@ -1782,12 +1615,12 @@ class TestDatasetsAPI(FixtureAPITestCase): with self.assertNumQueries(6): response = self.client.post( reverse("api:dataset-elements-selection", kwargs={"pk": self.corpus.id}), - data={"set": "training", "dataset_id": self.dataset.id}, + data={"set_id": str(train_set.id)}, format="json", ) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) self.assertQuerysetEqual( - self.dataset.dataset_elements.values_list("set", "element__name").order_by("element__name"), + train_set.set_elements.values_list("set__name", "element__name").order_by("element__name"), [ ("training", "Volume 1"), ("training", "Volume 1, page 1r"), @@ -1819,8 +1652,9 @@ class TestDatasetsAPI(FixtureAPITestCase): """ A non authenticated user can list datasets of a public element """ - self.dataset.dataset_elements.create(element=self.vol, set="train") - with self.assertNumQueries(3): + train_set = self.dataset.sets.get(name="training") + train_set.set_elements.create(element=self.vol) + with self.assertNumQueries(4): response = self.client.get(reverse("api:element-datasets", kwargs={"pk": str(self.vol.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { @@ -1833,7 +1667,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset.id), "name": "First Dataset", "description": "dataset number one", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -1842,7 +1682,7 @@ class TestDatasetsAPI(FixtureAPITestCase): "created": self.dataset.created.isoformat().replace("+00:00", "Z"), "updated": self.dataset.updated.isoformat().replace("+00:00", "Z"), }, - "set": "train", + "set": "training", "previous": None, "next": None }] @@ -1850,10 +1690,12 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_element_datasets(self): self.client.force_login(self.user) - self.dataset.dataset_elements.create(element=self.page1, set="train") - self.dataset.dataset_elements.create(element=self.page1, set="validation") - self.dataset2.dataset_elements.create(element=self.page1, set="train") - with self.assertNumQueries(5): + _, train_set, validation_set = self.dataset.sets.all().order_by("name") + train_set.set_elements.create(element=self.page1, set="train") + validation_set.set_elements.create(element=self.page1, set="validation") + train_set_2 = self.dataset2.sets.get(name="training") + train_set_2.set_elements.create(element=self.page1, set="train") + with self.assertNumQueries(6): response = self.client.get(reverse("api:element-datasets", kwargs={"pk": str(self.page1.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { @@ -1866,7 +1708,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset.id), "name": "First Dataset", "description": "dataset number one", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -1875,7 +1723,7 @@ class TestDatasetsAPI(FixtureAPITestCase): "created": self.dataset.created.isoformat().replace("+00:00", "Z"), "updated": self.dataset.updated.isoformat().replace("+00:00", "Z"), }, - "set": "train", + "set": "training", "previous": None, "next": None }, { @@ -1883,7 +1731,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset.id), "name": "First Dataset", "description": "dataset number one", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -1900,7 +1754,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset2.id), "name": "Second Dataset", "description": "dataset number two", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset2.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -1909,7 +1769,7 @@ class TestDatasetsAPI(FixtureAPITestCase): "created": self.dataset2.created.isoformat().replace("+00:00", "Z"), "updated": self.dataset2.updated.isoformat().replace("+00:00", "Z"), }, - "set": "train", + "set": "training", "previous": None, "next": None }] @@ -1917,10 +1777,12 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_element_datasets_with_neighbors_false(self): self.client.force_login(self.user) - self.dataset.dataset_elements.create(element=self.page1, set="train") - self.dataset.dataset_elements.create(element=self.page1, set="validation") - self.dataset2.dataset_elements.create(element=self.page1, set="train") - with self.assertNumQueries(5): + _, train_set, validation_set = self.dataset.sets.all().order_by("name") + train_set_2 = self.dataset2.sets.get(name="training") + train_set.set_elements.create(element=self.page1) + validation_set.set_elements.create(element=self.page1) + train_set_2.set_elements.create(element=self.page1) + with self.assertNumQueries(6): response = self.client.get(reverse("api:element-datasets", kwargs={"pk": str(self.page1.id)}), {"with_neighbors": False}) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { @@ -1933,7 +1795,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset.id), "name": "First Dataset", "description": "dataset number one", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -1942,7 +1810,7 @@ class TestDatasetsAPI(FixtureAPITestCase): "created": self.dataset.created.isoformat().replace("+00:00", "Z"), "updated": self.dataset.updated.isoformat().replace("+00:00", "Z"), }, - "set": "train", + "set": "training", "previous": None, "next": None }, { @@ -1950,7 +1818,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset.id), "name": "First Dataset", "description": "dataset number one", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -1967,7 +1841,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset2.id), "name": "Second Dataset", "description": "dataset number two", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset2.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -1976,7 +1856,7 @@ class TestDatasetsAPI(FixtureAPITestCase): "created": self.dataset2.created.isoformat().replace("+00:00", "Z"), "updated": self.dataset2.updated.isoformat().replace("+00:00", "Z"), }, - "set": "train", + "set": "training", "previous": None, "next": None }] @@ -1984,12 +1864,14 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_element_datasets_with_neighbors(self): self.client.force_login(self.user) - self.dataset.dataset_elements.create(element=self.page1, set="train") - self.dataset.dataset_elements.create(element=self.page2, set="train") - self.dataset.dataset_elements.create(element=self.page3, set="train") - self.dataset.dataset_elements.create(element=self.page1, set="validation") - self.dataset2.dataset_elements.create(element=self.page1, set="train") - self.dataset2.dataset_elements.create(element=self.page3, set="train") + _, train_set, validation_set = self.dataset.sets.all().order_by("name") + train_set_2 = self.dataset2.sets.get(name="training") + train_set.set_elements.create(element=self.page1) + train_set.set_elements.create(element=self.page2) + train_set.set_elements.create(element=self.page3) + validation_set.set_elements.create(element=self.page1) + train_set_2.set_elements.create(element=self.page1) + train_set_2.set_elements.create(element=self.page3) # Results are alphabetically ordered and must not depend on the random page UUIDs sorted_dataset_elements = sorted([str(self.page1.id), str(self.page2.id), str(self.page3.id)]) @@ -1997,9 +1879,10 @@ class TestDatasetsAPI(FixtureAPITestCase): sorted_dataset2_elements = sorted([str(self.page1.id), str(self.page3.id)]) page1_index_2 = sorted_dataset2_elements.index(str(self.page1.id)) - with self.assertNumQueries(7): + with self.assertNumQueries(8): response = self.client.get(reverse("api:element-datasets", kwargs={"pk": str(self.page1.id)}), {"with_neighbors": True}) self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual(response.json(), { "count": 3, "next": None, @@ -2010,7 +1893,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset.id), "name": "First Dataset", "description": "dataset number one", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -2019,7 +1908,7 @@ class TestDatasetsAPI(FixtureAPITestCase): "created": self.dataset.created.isoformat().replace("+00:00", "Z"), "updated": self.dataset.updated.isoformat().replace("+00:00", "Z"), }, - "set": "train", + "set": "training", "previous": ( sorted_dataset_elements[page1_index_1 - 1] if page1_index_1 - 1 >= 0 @@ -2035,7 +1924,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset.id), "name": "First Dataset", "description": "dataset number one", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -2052,7 +1947,13 @@ class TestDatasetsAPI(FixtureAPITestCase): "id": str(self.dataset2.id), "name": "Second Dataset", "description": "dataset number two", - "sets": ["training", "test", "validation"], + "sets": [ + { + "id": str(ds.id), + "name": ds.name + } + for ds in self.dataset2.sets.order_by("name") + ], "set_elements": None, "state": "open", "corpus_id": str(self.corpus.id), @@ -2061,7 +1962,7 @@ class TestDatasetsAPI(FixtureAPITestCase): "created": self.dataset2.created.isoformat().replace("+00:00", "Z"), "updated": self.dataset2.updated.isoformat().replace("+00:00", "Z"), }, - "set": "train", + "set": "training", "previous": ( sorted_dataset2_elements[page1_index_2 - 1] if page1_index_2 == 1 @@ -2136,13 +2037,14 @@ class TestDatasetsAPI(FixtureAPITestCase): self.dataset.state = DatasetState.Error self.dataset.task = self.task self.dataset.save() - self.dataset.dataset_elements.create(element=self.page1, set="test") - self.dataset.dataset_elements.create(element=self.page1, set="validation") - self.dataset.dataset_elements.create(element=self.vol, set="validation") + test_set, _, validation_set = self.dataset.sets.all().order_by("name") + test_set.set_elements.create(element=self.page1) + validation_set.set_elements.create(element=self.page1) + validation_set.set_elements.create(element=self.vol) self.assertCountEqual(self.corpus.datasets.values_list("name", flat=True), ["First Dataset", "Second Dataset"]) self.client.force_login(self.user) - with self.assertNumQueries(12): + with self.assertNumQueries(14): response = self.client.post( reverse("api:dataset-clone", kwargs={"pk": self.dataset.id}), format="json", @@ -2155,9 +2057,11 @@ class TestDatasetsAPI(FixtureAPITestCase): ]) data = response.json() clone = self.corpus.datasets.get(id=data["id"]) + test_clone, train_clone, val_clone = clone.sets.all().order_by("name") self.assertEqual(clone.creator, self.user) data.pop("created") data.pop("updated") + cloned_sets = data.pop("sets") self.assertDictEqual( response.json(), { @@ -2166,14 +2070,27 @@ class TestDatasetsAPI(FixtureAPITestCase): "description": self.dataset.description, "creator": self.user.display_name, "corpus_id": str(self.corpus.id), - "sets": ["training", "test", "validation"], "set_elements": {"test": 1, "training": 0, "validation": 2}, "state": DatasetState.Open.value, "task_id": str(self.task.id), }, ) + self.assertCountEqual(cloned_sets, [ + { + "name": "training", + "id": str(train_clone.id) + }, + { + "name": "test", + "id": str(test_clone.id) + }, + { + "name": "validation", + "id": str(val_clone.id) + } + ]) self.assertQuerysetEqual( - clone.dataset_elements.values_list("set", "element__name").order_by("element__name", "set"), + DatasetElement.objects.filter(set__dataset_id=clone.id).values_list("set__name", "element__name").order_by("element__name", "set__name"), [ ("validation", "Volume 1"), ("test", "Volume 1, page 1r"), @@ -2184,7 +2101,7 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_clone_existing_name(self): self.corpus.datasets.create(name="Clone of First Dataset", creator=self.user) self.client.force_login(self.user) - with self.assertNumQueries(11): + with self.assertNumQueries(13): response = self.client.post( reverse("api:dataset-clone", kwargs={"pk": self.dataset.id}), format="json", @@ -2198,27 +2115,43 @@ class TestDatasetsAPI(FixtureAPITestCase): ]) data = response.json() - data.pop("id") data.pop("created") data.pop("updated") + clone = Dataset.objects.get(id=data["id"]) + test_clone, train_clone, val_clone = clone.sets.all().order_by("name") + cloned_sets = data.pop("sets") self.assertDictEqual( response.json(), { + "id": str(clone.id), "name": "Clone of First Dataset 1", "description": self.dataset.description, "creator": self.user.display_name, "corpus_id": str(self.corpus.id), - "sets": self.dataset.sets, - "set_elements": {k: 0 for k in self.dataset.sets}, + "set_elements": {str(k.name): 0 for k in self.dataset.sets.all()}, "state": DatasetState.Open.value, "task_id": None, }, ) + self.assertCountEqual(cloned_sets, [ + { + "name": "training", + "id": str(train_clone.id) + }, + { + "name": "test", + "id": str(test_clone.id) + }, + { + "name": "validation", + "id": str(val_clone.id) + } + ]) def test_clone_name_too_long(self): dataset = self.corpus.datasets.create(name="A" * 99, creator=self.user) self.client.force_login(self.user) - with self.assertNumQueries(11): + with self.assertNumQueries(12): response = self.client.post( reverse("api:dataset-clone", kwargs={"pk": dataset.id}), format="json", @@ -2236,7 +2169,7 @@ class TestDatasetsAPI(FixtureAPITestCase): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": str(self.page1.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) @@ -2247,28 +2180,29 @@ class TestDatasetsAPI(FixtureAPITestCase): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": str(self.page1.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) @patch("arkindex.project.mixins.has_access", return_value=False) def test_destroy_dataset_element_requires_contributor(self, has_access_mock): self.client.force_login(self.read_user) - self.dataset.dataset_elements.create(element=self.page1, set="train") - self.dataset.dataset_elements.create(element=self.page1, set="validation") - self.assertEqual(self.dataset.dataset_elements.filter(set="train").count(), 1) - self.assertEqual(self.dataset.dataset_elements.filter(set="validation").count(), 1) + _, train_set, validation_set = self.dataset.sets.all().order_by("name") + train_set.set_elements.create(element=self.page1) + validation_set.set_elements.create(element=self.page1) + self.assertEqual(train_set.set_elements.count(), 1) + self.assertEqual(validation_set.set_elements.count(), 1) with self.assertNumQueries(3): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": str(self.page1.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) self.assertDictEqual(response.json(), {"detail": "You need a Contributor access to the dataset to perform this action."}) self.dataset.refresh_from_db() - self.assertEqual(self.dataset.dataset_elements.filter(set="train").count(), 1) - self.assertEqual(self.dataset.dataset_elements.filter(set="validation").count(), 1) + self.assertEqual(train_set.set_elements.count(), 1) + self.assertEqual(validation_set.set_elements.count(), 1) self.assertEqual(has_access_mock.call_count, 1) self.assertEqual(has_access_mock.call_args, call(self.read_user, self.corpus, Role.Contributor.value, skip_public=False)) @@ -2285,23 +2219,24 @@ class TestDatasetsAPI(FixtureAPITestCase): def test_destroy_dataset_element_requires_open_dataset(self): self.client.force_login(self.user) - self.dataset.dataset_elements.create(element=self.page1, set="train") - self.dataset.dataset_elements.create(element=self.page1, set="validation") + _, train_set, validation_set = self.dataset.sets.all().order_by("name") + train_set.set_elements.create(element=self.page1) + validation_set.set_elements.create(element=self.page1) self.dataset.state = DatasetState.Error self.dataset.save() - self.assertEqual(self.dataset.dataset_elements.filter(set="train").count(), 1) - self.assertEqual(self.dataset.dataset_elements.filter(set="validation").count(), 1) + self.assertEqual(train_set.set_elements.count(), 1) + self.assertEqual(validation_set.set_elements.count(), 1) with self.assertNumQueries(3): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": str(self.page1.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertDictEqual(response.json(), {"dataset": ["Elements can only be removed from open Datasets."]}) self.dataset.refresh_from_db() - self.assertEqual(self.dataset.dataset_elements.filter(set="train").count(), 1) - self.assertEqual(self.dataset.dataset_elements.filter(set="validation").count(), 1) + self.assertEqual(train_set.set_elements.count(), 1) + self.assertEqual(validation_set.set_elements.count(), 1) def test_destroy_dataset_element_dataset_doesnt_exist(self): self.client.force_login(self.user) @@ -2309,7 +2244,7 @@ class TestDatasetsAPI(FixtureAPITestCase): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "element": str(self.page1.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual(response.json(), {"detail": "Not found."}) @@ -2331,49 +2266,52 @@ class TestDatasetsAPI(FixtureAPITestCase): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual(response.json(), {"detail": "Not found."}) def test_destroy_dataset_element_element_not_in_dataset(self): - self.dataset.dataset_elements.create(element=self.page1, set="train") + train_set = self.dataset.sets.get(name="training") + train_set.set_elements.create(element=self.page1, set="train") self.client.force_login(self.user) with self.assertNumQueries(3): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": str(self.page2.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual(response.json(), {"detail": "Not found."}) def test_destroy_dataset_element_wrong_set(self): - self.dataset.dataset_elements.create(element=self.page1, set="train") - self.dataset.dataset_elements.create(element=self.page2, set="validation") + _, train_set, validation_set = self.dataset.sets.all().order_by("name") + train_set.set_elements.create(element=self.page1) + validation_set.set_elements.create(element=self.page1) self.client.force_login(self.user) with self.assertNumQueries(3): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": str(self.page2.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertDictEqual(response.json(), {"detail": "Not found."}) def test_destroy_dataset_element(self): self.client.force_login(self.user) - self.dataset.dataset_elements.create(element=self.page1, set="train") - self.dataset.dataset_elements.create(element=self.page1, set="validation") - self.assertEqual(self.dataset.dataset_elements.filter(set="train").count(), 1) - self.assertEqual(self.dataset.dataset_elements.filter(set="validation").count(), 1) + _, train_set, validation_set = self.dataset.sets.all().order_by("name") + train_set.set_elements.create(element=self.page1) + validation_set.set_elements.create(element=self.page1) + self.assertEqual(train_set.set_elements.count(), 1) + self.assertEqual(validation_set.set_elements.count(), 1) with self.assertNumQueries(4): response = self.client.delete(reverse( "api:dataset-element", kwargs={"dataset": str(self.dataset.id), "element": str(self.page1.id)}) - + "?set=train" + + "?set=training" ) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) self.dataset.refresh_from_db() - self.assertEqual(self.dataset.dataset_elements.filter(set="train").count(), 0) - self.assertEqual(self.dataset.dataset_elements.filter(set="validation").count(), 1) + self.assertEqual(train_set.set_elements.count(), 0) + self.assertEqual(validation_set.set_elements.count(), 1)