Skip to content
Snippets Groups Projects
Commit c7ab06d4 authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Path reordering command

parent 804fef3f
No related branches found
No related tags found
1 merge request!1982Path reordering command
#!/usr/bin/env python3
from pathlib import Path
from uuid import UUID
from django.core.management.base import BaseCommand, CommandError
from django.db import connections
from teklia_toolbox.time import Timer
from arkindex.documents.models import Corpus
from arkindex.project.argparse import CorpusArgument
class Command(BaseCommand):
help = 'Recompute the orderings of all ElementPaths.'
def add_arguments(self, parser):
super().add_arguments(parser)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
'-c',
'--corpus',
help='ID of a single corpus to reorder paths on.',
type=CorpusArgument(),
)
group.add_argument(
'-a',
'--all',
help='Reorder every ElementPath in every corpus.',
action='store_true',
)
parser.add_argument(
'-l',
'--corpora-list',
help='File listing the UUIDs of corpora that have already been reordered and should be skipped.\n'
'The UUIDs of each corpus that this command will complete will be appended to the file.\n'
'This allows resuming an interrupted execution.',
type=Path,
default='/tmp/reorder_paths.txt',
)
def handle(self, all=False, corpus=None, corpora_list=None, **options):
if all:
to_reorder = Corpus.objects.all()
elif corpus is not None:
to_reorder = Corpus.objects.filter(id=corpus.id)
else:
raise CommandError('Either --all or --corpus must be set.')
if corpora_list.exists():
to_reorder = to_reorder.exclude(id__in=[
UUID(id.strip())
for id in corpora_list.read_text().strip().splitlines()
])
total = to_reorder.count()
if not total:
self.stdout.write(self.style.WARNING('Nothing to reorder.'))
return
self.stdout.write(f'Reordering element paths in {total} corpora.')
for corpus in to_reorder:
self.reorder(corpus)
with corpora_list.open('a') as f:
f.write(f'{corpus.id}\n')
def reorder(self, corpus):
self.stdout.write(f'Reordering element paths in {corpus.name} ({corpus.id})…')
with connections['default'].cursor() as cursor, Timer() as t:
# We do not use orderings on top-level elements, so their orderings will always be set to 0
self.stdout.write('Setting top-level orderings to zero…')
cursor.execute("""
UPDATE documents_elementpath
SET ordering = 0
FROM documents_element element
WHERE element_id = element.id
AND element.corpus_id = %s
AND path[array_length(path, 1)] IS NULL
AND ordering <> 0
""", [corpus.id])
self.stdout.write(f'Set top-level orderings on {cursor.rowcount} element paths in {corpus.name}.')
self.stdout.write('Rebuilding orderings for other paths…')
cursor.execute("""
CREATE TEMPORARY TABLE new_orderings AS
SELECT
element_id,
parent_id,
ROW_NUMBER() OVER (
PARTITION BY parent_id
ORDER BY ordering, name, element_id
) AS ordering
FROM (
SELECT
DISTINCT ON (element_id, path[array_length(path, 1)])
element_id,
element.name,
path[array_length(path, 1)] AS parent_id,
ordering
FROM documents_elementpath elementpath
INNER JOIN documents_element element
ON element.id = elementpath.element_id
WHERE element.corpus_id = %s
AND path[array_length(path, 1)] IS NOT NULL
) unique_links
""", [corpus.id])
self.stdout.write('Saving new orderings…')
cursor.execute("""
UPDATE documents_elementpath elementpath
SET ordering = new_orderings.ordering
FROM new_orderings
WHERE elementpath.element_id = new_orderings.element_id
AND path[array_length(path, 1)] = new_orderings.parent_id
AND elementpath.ordering <> new_orderings.ordering
""")
path_count = cursor.rowcount
cursor.execute("DROP TABLE new_orderings")
self.stdout.write(self.style.SUCCESS(f'Reordered {path_count} element paths in {corpus.name} in {t.delta}.'))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment