diff --git a/arkindex/documents/export/structure.sql b/arkindex/documents/export/structure.sql index fe1849707eb93aa65cbf75be974fc29575ee8299..add6b7a306e2fa7ff080d0fb77d9776a454f3fe7 100644 --- a/arkindex/documents/export/structure.sql +++ b/arkindex/documents/export/structure.sql @@ -16,6 +16,7 @@ CREATE TABLE worker_version ( slug VARCHAR(100) NOT NULL, type VARCHAR(50) NOT NULL, revision VARCHAR(50) NOT NULL, + repository_url TEXT NOT NULL, PRIMARY KEY (id) ); diff --git a/arkindex/documents/export/worker_version.sql b/arkindex/documents/export/worker_version.sql index 6f99f8744caa5c928ee215717e65696ca0cb4b7e..509b9417c91c4dd49d916efa2afb344d07f95f6f 100644 --- a/arkindex/documents/export/worker_version.sql +++ b/arkindex/documents/export/worker_version.sql @@ -3,9 +3,10 @@ -- fills up the RAM. Adding DISTINCT to all the SELECT queries of the UNION -- slows this query down by ~20%. Using multiple INs instead of a UNION makes -- this query twice as slow. -SELECT version.id, worker.name, worker.slug, worker.type, revision.hash +SELECT version.id, worker.name, worker.slug, worker.type, revision.hash, repository.url FROM dataimport_workerversion version INNER JOIN dataimport_worker worker ON (version.worker_id = worker.id) +INNER JOIN dataimport_repository repository ON (worker.repository_id = repository.id) INNER JOIN dataimport_revision revision ON (version.revision_id = revision.id) WHERE version.id IN ( SELECT worker_version_id FROM documents_element WHERE corpus_id = '{corpus_id}'::uuid diff --git a/arkindex/documents/tests/tasks/test_export.py b/arkindex/documents/tests/tasks/test_export.py index 59efd89e14af120fa1947adda3a9eec443ad52a6..f03fb05e51c7ca688acc50eebe5f52a54aa39ebc 100644 --- a/arkindex/documents/tests/tasks/test_export.py +++ b/arkindex/documents/tests/tasks/test_export.py @@ -137,21 +137,23 @@ class TestExport(FixtureTestCase): ) self.assertCountEqual( - db.execute('SELECT id, name, slug, type, revision FROM worker_version').fetchall(), + db.execute('SELECT id, name, slug, type, revision, repository_url FROM worker_version').fetchall(), [ ( str(version.id), version.worker.name, version.worker.slug, version.worker.type, - version.revision.hash + version.revision.hash, + version.worker.repository.url ), ( str(metadata_version.id), metadata_version.worker.name, metadata_version.worker.slug, metadata_version.worker.type, - metadata_version.revision.hash + metadata_version.revision.hash, + metadata_version.worker.repository.url ) ] )