Skip to content
Snippets Groups Projects
Commit e63348c8 authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Add indexes to corpus export

parent 87bd6dd1
No related branches found
No related tags found
1 merge request!1461Add indexes to corpus export
......@@ -120,12 +120,17 @@ def export_corpus(corpus_export: CorpusExport) -> None:
logger.info(f"Running query {i+1}/{len(EXPORT_QUERIES)} {name}")
if rq_job:
rq_job.set_progress(i / len(EXPORT_QUERIES))
rq_job.set_progress(i / (len(EXPORT_QUERIES) + 1))
for chunk in run_pg_query(query.format(corpus_id=corpus_export.corpus_id)):
save_sqlite(chunk, name, cursor)
db.commit()
logger.info('Creating indexes')
if rq_job:
rq_job.set_progress(len(EXPORT_QUERIES) / (len(EXPORT_QUERIES) + 1))
cursor.executescript((BASE_DIR / 'indexes.sql').read_text())
db.close()
# Give a nice feedback on the upload portion of the export using boto3's upload callback
......
CREATE INDEX image_server_id ON image (server_id);
CREATE INDEX element_image_id ON element (image_id);
CREATE INDEX element_worker_version_id ON element (worker_version_id);
CREATE INDEX element_path_parent_id ON element_path (parent_id);
CREATE INDEX element_path_child_id ON element_path (child_id);
CREATE INDEX transcription_element_id ON transcription (element_id);
CREATE INDEX transcription_worker_version_id ON transcription (worker_version_id);
CREATE INDEX classification_element_id ON classification (element_id);
CREATE INDEX classification_worker_version_id ON classification (worker_version_id);
CREATE INDEX entity_worker_version_id ON entity (worker_version_id);
CREATE INDEX transcription_entity_transcription_id ON transcription_entity (transcription_id);
CREATE INDEX transcription_entity_entity_id ON transcription_entity (entity_id);
CREATE INDEX transcription_entity_worker_version_id ON transcription_entity (worker_version_id);
CREATE INDEX entity_link_parent_id ON entity_link (parent_id);
CREATE INDEX entity_link_child_id ON entity_link (child_id);
CREATE INDEX entity_link_role_id ON entity_link (role_id);
CREATE INDEX metadata_element_id ON metadata (element_id);
CREATE INDEX metadata_entity_id ON metadata (entity_id);
CREATE INDEX metadata_worker_version_id ON metadata (worker_version_id);
......@@ -26,6 +26,21 @@ from arkindex.images.models import Image, ImageServer
from arkindex.project.tests import FixtureTestCase
from ponos.models import Artifact
TABLE_NAMES = {
'classification',
'element',
'element_path',
'entity',
'entity_link',
'entity_role',
'image',
'image_server',
'metadata',
'transcription',
'transcription_entity',
'worker_version',
}
class TestExport(FixtureTestCase):
......@@ -123,6 +138,11 @@ class TestExport(FixtureTestCase):
db = sqlite3.connect(db_path)
self.assertCountEqual(
db.execute("SELECT name FROM sqlite_master WHERE type = 'table'").fetchall(),
[(name, ) for name in TABLE_NAMES],
)
self.assertCountEqual(
db.execute('SELECT id, display_name, url, max_width, max_height FROM image_server').fetchall(),
[
......@@ -332,6 +352,19 @@ class TestExport(FixtureTestCase):
]
)
expected_indexes = [
# foreign_key columns: id, seq, table, from, to, on_update, on_delete, match
# Cannot use a sqlite3.Row here because `foreign_key.from` would be a SyntaxError in Python
(table_name, f'{table_name}_{foreign_key[3]}')
for table_name in TABLE_NAMES
for foreign_key in db.execute(f'PRAGMA foreign_key_list({table_name})').fetchall()
]
self.assertCountEqual(
db.execute("SELECT tbl_name, name FROM sqlite_master WHERE type = 'index' AND name NOT LIKE 'sqlite\\_autoindex\\_%' ESCAPE '\\'").fetchall(),
expected_indexes
)
os.unlink(db_path)
# Download URL is sent by email
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment