Skip to content
Snippets Groups Projects
Commit 6096eedf authored by Bastien Abadie's avatar Bastien Abadie
Browse files

WIP folios

parent 86c08f65
No related branches found
No related tags found
1 merge request!1WIP Documents + Folio
certifi==2017.7.27.1
chardet==3.0.4
Django==1.11.6
djangorestframework==3.7.1
django-webpack-loader==0.5.0
djangorestframework==3.7.1
elasticsearch==5.4.0
et-xmlfile==1.0.1
idna==2.6
jdcal==1.3
olefile==0.44
openpyxl==2.4.9
Pillow==4.3.0
psycopg2==2.7.3.2
pytz==2017.2
......
from django.core.management.base import BaseCommand, CommandError
from images.models import ImageServer, Zone
from openpyxl import load_workbook
from documents.models import Transcription, TRANSCRIPTION_ZONE
import roman
import gzip
import os
import re
import logging
logging.basicConfig(
level=logging.INFO,
format='[%(levelname)s] %(message)s',
)
logger = logging.getLogger(__name__)
REGEX_FOLIO_PAGE = re.compile(r'^(\d+)(r|v)?')
REGEX_FOLIO_ROMAN = re.compile(r'^0*([XVI]+)(r|v)?')
class Command(BaseCommand):
help = 'Import documents into backend'
def add_arguments(self, parser):
parser.add_argument(
'files',
nargs='+',
help='Indexes source files'
)
parser.add_argument(
'--iiif-server',
type=str,
help='IIIF server where to find images',
required=True,
)
parser.add_argument(
'--iiif-prefix',
type=str,
help='IIIF optional folder prefixing images path',
required=False,
)
def handle(self, *args, **options):
# Load the server
try:
server_id = options['iiif_server']
if server_id.isdigit():
server = ImageServer.objects.get(pk=server_id)
else:
server = ImageServer.objects.get(name=server_id)
except Exception as e:
raise CommandError("Image server not found: {}".format(e))
logger.info('Using Server: {}'.format(server))
# Import the files
imported = 0
prefix = options['iiif_prefix'] or ''
for path in options['files']:
try:
self.import_documents(server, prefix, path)
imported += 1
except Exception as e:
logger.error(e)
raise
logger.info('Imported {}/{} documents'.format(
imported,
len(options['files']),
))
def import_documents(self, server, prefix, path, extension='jpg'):
"""
Import documents from file
Supports Excel 2010 files (xlsx)
"""
assert os.path.exists(path)
assert path.endswith('.xlsx')
# Load excel file
wb = load_workbook(filename=path, read_only=True)
ws = wb.active
for line in ws.rows:
register, filename, old_filename, folio, notes = map(lambda x : x.value, line)
out = self.parse_folio(folio)
#if out and out[0] != 'standard':
# print(out)
def parse_folio(self, folio):
"""
Parse folio to build a DB representation
"""
if folio is None:
return
# Standard page
page_match = REGEX_FOLIO_PAGE.match(folio)
if page_match is not None:
page, position = page_match.groups()
page = int(page)
return ('standard', page, position)
# Roman page
roman_match = REGEX_FOLIO_ROMAN.match(folio)
if roman_match is not None:
page, position = roman_match.groups()
page = roman.fromRoman(page)
return ('roman', page, position)
# Words
words = ('contre-plat', 'contre-garde', 'contre-plat inférieur', 'contre-plat supérieur', 'plat inférieur', 'plat supérieur', 'garde supérieur', 'contre-garde supérieur', 'copie insérée', 'non folioté', 'garde', 'plat', )
if folio.lower() in words:
return ('special', folio)
print('>>>', folio)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment