Skip to content
Snippets Groups Projects
Commit f4cb4f7a authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'export-indexes' into 'master'

Add indexes to corpus export

Closes #828

See merge request !1461
parents 87bd6dd1 e63348c8
No related branches found
No related tags found
1 merge request!1461Add indexes to corpus export
......@@ -120,12 +120,17 @@ def export_corpus(corpus_export: CorpusExport) -> None:
logger.info(f"Running query {i+1}/{len(EXPORT_QUERIES)} {name}")
if rq_job:
rq_job.set_progress(i / len(EXPORT_QUERIES))
rq_job.set_progress(i / (len(EXPORT_QUERIES) + 1))
for chunk in run_pg_query(query.format(corpus_id=corpus_export.corpus_id)):
save_sqlite(chunk, name, cursor)
db.commit()
logger.info('Creating indexes')
if rq_job:
rq_job.set_progress(len(EXPORT_QUERIES) / (len(EXPORT_QUERIES) + 1))
cursor.executescript((BASE_DIR / 'indexes.sql').read_text())
db.close()
# Give a nice feedback on the upload portion of the export using boto3's upload callback
......
CREATE INDEX image_server_id ON image (server_id);
CREATE INDEX element_image_id ON element (image_id);
CREATE INDEX element_worker_version_id ON element (worker_version_id);
CREATE INDEX element_path_parent_id ON element_path (parent_id);
CREATE INDEX element_path_child_id ON element_path (child_id);
CREATE INDEX transcription_element_id ON transcription (element_id);
CREATE INDEX transcription_worker_version_id ON transcription (worker_version_id);
CREATE INDEX classification_element_id ON classification (element_id);
CREATE INDEX classification_worker_version_id ON classification (worker_version_id);
CREATE INDEX entity_worker_version_id ON entity (worker_version_id);
CREATE INDEX transcription_entity_transcription_id ON transcription_entity (transcription_id);
CREATE INDEX transcription_entity_entity_id ON transcription_entity (entity_id);
CREATE INDEX transcription_entity_worker_version_id ON transcription_entity (worker_version_id);
CREATE INDEX entity_link_parent_id ON entity_link (parent_id);
CREATE INDEX entity_link_child_id ON entity_link (child_id);
CREATE INDEX entity_link_role_id ON entity_link (role_id);
CREATE INDEX metadata_element_id ON metadata (element_id);
CREATE INDEX metadata_entity_id ON metadata (entity_id);
CREATE INDEX metadata_worker_version_id ON metadata (worker_version_id);
......@@ -26,6 +26,21 @@ from arkindex.images.models import Image, ImageServer
from arkindex.project.tests import FixtureTestCase
from ponos.models import Artifact
TABLE_NAMES = {
'classification',
'element',
'element_path',
'entity',
'entity_link',
'entity_role',
'image',
'image_server',
'metadata',
'transcription',
'transcription_entity',
'worker_version',
}
class TestExport(FixtureTestCase):
......@@ -123,6 +138,11 @@ class TestExport(FixtureTestCase):
db = sqlite3.connect(db_path)
self.assertCountEqual(
db.execute("SELECT name FROM sqlite_master WHERE type = 'table'").fetchall(),
[(name, ) for name in TABLE_NAMES],
)
self.assertCountEqual(
db.execute('SELECT id, display_name, url, max_width, max_height FROM image_server').fetchall(),
[
......@@ -332,6 +352,19 @@ class TestExport(FixtureTestCase):
]
)
expected_indexes = [
# foreign_key columns: id, seq, table, from, to, on_update, on_delete, match
# Cannot use a sqlite3.Row here because `foreign_key.from` would be a SyntaxError in Python
(table_name, f'{table_name}_{foreign_key[3]}')
for table_name in TABLE_NAMES
for foreign_key in db.execute(f'PRAGMA foreign_key_list({table_name})').fetchall()
]
self.assertCountEqual(
db.execute("SELECT tbl_name, name FROM sqlite_master WHERE type = 'index' AND name NOT LIKE 'sqlite\\_autoindex\\_%' ESCAPE '\\'").fetchall(),
expected_indexes
)
os.unlink(db_path)
# Download URL is sent by email
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment