Skip to content
Snippets Groups Projects
Commit bf41be4e authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Small enhancements on importer

parent 1cded0fd
No related branches found
No related tags found
No related merge requests found
......@@ -108,6 +108,7 @@ class ManifestsImporter(ABC):
"""
Parses JSON manifests and annotation data to import them in the database.
"""
offline = False
def __init__(self, imgserv):
"""Initialize a manifest importer
......@@ -117,12 +118,15 @@ class ManifestsImporter(ABC):
else:
assert all(isinstance(srv, ImageServer) for srv in imgserv)
self.imgserv = imgserv
# This dictionary associates canvas IDs with images and pages
# Filled by parse_manifest ; used by parse_annotation_list
self.canvas_image = {}
# This dictionary holds parsed data for each image's zones and transcriptions
# Filled by parse_annotation_list ; used by save_transcriptions
self.images_transcription_data = {}
# Image server statistics - hold image counts for each server
self.server_images_count = {}
......@@ -134,11 +138,12 @@ class ManifestsImporter(ABC):
"""
pass
def run(self):
def run(self, offline=False):
"""Start the importing process."""
self.offline = offline
for f in self.get_json_files():
self.parse_file(f)
self.save_transcriptions()
self.save_transcriptions()
for srv, count in self.server_images_count.items():
logger.info("Saved {0} images for server {1} ({2})".format(
count, srv.name, srv.url))
......@@ -216,16 +221,25 @@ class ManifestsImporter(ABC):
path = service_id[len(image_server.url):].lstrip('/')
# Create image and page
logger.debug("Creating image {}".format(path))
image = image_server.find_image(path)
logger.debug("Creating page {}".format(folio))
page = import_page(volume, image, volume.name, folio, i)
try:
logger.debug("Looking for image {}".format(path))
image = image_server.find_image(path, offline=self.offline)
logger.debug("Found image {}".format(image.id))
logger.debug("Looking for page {}".format(folio))
page = import_page(volume, image, volume.name, folio, i)
logger.debug("Imported page {}".format(page.id))
except Exception as e:
logger.error('Failed to import page: {}'.format(e))
return
# Fill canvas_image dictionary for usage by annotation list parsing
self.canvas_image[canvas.get('@id')] = (image, page)
# Fill image server statistics
self.server_images_count[image_server] = self.server_images_count.get(image_server, 0) + 1
logger.info('Added page {} - image {}'.format(page, image))
def parse_annotation_list(self, stream):
"""Parse a IIIF annotation list loaded as a stream.
Zones and transcriptions are not immediately saved; use save_transcriptions()."""
......@@ -282,6 +296,9 @@ class ManifestsImporter(ABC):
logger.info("Saved {0} zones, {1} transcriptions and {2} indexes".format(
total_zones, total_transcriptions, total_indexes))
# Reset
self.images_transcription_data = {}
def find_image_server(self, image_url):
"""Find an image server for a specific image.
If server is not found, will ask the user; if user refuses to create a server, will return None."""
......@@ -335,13 +352,21 @@ class ManifestsImporter(ABC):
class LocalManifestsImporter(ManifestsImporter):
"""Allows importing of local JSON files."""
def __init__(self, imgserv, folder):
def __init__(self, imgserv, path):
super().__init__(imgserv)
assert os.path.isdir(folder), 'Invalid folder path {}'.format(folder)
self.folder = folder
self.path = path
def get_json_files(self):
for root, _, filenames in os.walk(self.folder):
for filename in fnmatch.filter(filenames, "*.json"):
logger.debug("Opening {}".format(filename))
yield open(os.path.join(root, filename), 'rb')
# Support single file & directories
if os.path.isdir(self.path):
paths = [
os.path.join(root, filename)
for root, _, filenames in os.walk(self.path)
for filename in fnmatch.filter(filenames, "*.json")
]
else:
paths = [os.path.realpath(self.path), ]
for path in paths:
logger.info("Opening {}".format(path))
yield open(path, 'rb')
......@@ -5,7 +5,7 @@ from documents.importer import LocalManifestsImporter
import logging
logging.basicConfig(
level=logging.DEBUG,
level=logging.INFO,
format='[%(levelname)s] %(message)s',
)
......@@ -25,8 +25,20 @@ class Command(BaseCommand):
help='IIIF server where to find images',
nargs='+'
)
parser.add_argument(
'--offline',
action='store_true',
default=False,
help='Allow importer to make network queries',
)
def handle(self, *args, **options):
# Handel verbosity level
verbosity = int(options['verbosity'])
root_logger = logging.getLogger('')
if verbosity > 1:
root_logger.setLevel(logging.DEBUG)
# Load the server
try:
server_ids = options['iiif_server'] or []
......@@ -34,4 +46,4 @@ class Command(BaseCommand):
except Exception as e:
raise CommandError("Image server not found: {}".format(e))
LocalManifestsImporter(servers, options['manifest_folder']).run()
LocalManifestsImporter(servers, options['manifest_folder']).run(offline=options['offline'])
......@@ -24,7 +24,7 @@ class ImageServer(models.Model):
def __str__(self):
return self.name
def find_image(self, path):
def find_image(self, path, offline=False):
"""
Lookup an image on server
This is the preferred way to construct an image
......@@ -45,6 +45,11 @@ class ImageServer(models.Model):
except Image.DoesNotExist:
pass
# Support offline queries
if offline:
# TODO: create dumb image here
return img
# Check the source
if not path.endswith('/'):
path += '/'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment