Skip to content
Snippets Groups Projects
Commit 974a425a authored by Erwan Rouchet's avatar Erwan Rouchet
Browse files

Nits

parent 487798dd
No related branches found
No related tags found
1 merge request!22Add score to transcriptions
......@@ -109,7 +109,7 @@ class ManifestsImporter(ABC):
Parses JSON manifests and annotation data to import them in the database.
"""
def __init__(self, imgserv, offline=False, no_annotations=False):
def __init__(self, imgserv, offline=False, annotations=True):
"""Initialize a manifest importer
`imgserv` can be either one ImageServer or a list of ImageServers."""
if isinstance(imgserv, ImageServer):
......@@ -119,7 +119,7 @@ class ManifestsImporter(ABC):
self.imgserv = imgserv
self.offline = offline
self.no_annotations = no_annotations
self.annotations = annotations
# This dictionary associates canvas IDs with images and pages
# Filled by parse_manifest ; used by parse_annotation_list
......@@ -158,7 +158,7 @@ class ManifestsImporter(ABC):
stream.seek(0)
self.parse_manifest(stream)
break
elif value == 'sc:AnnotationList' and not self.no_annotations:
elif value == 'sc:AnnotationList' and not self.annotations:
stream.seek(0)
self.parse_annotation_list(stream)
break
......
......@@ -17,7 +17,7 @@ class Command(BaseCommand):
parser.add_argument(
'index_folder',
help='Folder to recursively search for index files',
default='.'
default='.',
)
parser.add_argument(
'--dry-run',
......@@ -28,13 +28,14 @@ class Command(BaseCommand):
parser.add_argument(
'--mask',
help="""A mask to identify images from the index file path. Cannot be used with --regex.
Syntax: "something<ID>something"
<ID> will be used as the image ID."""
Syntax: "something<PATH>something"
<PATH> will be used as the image path.""",
)
parser.add_argument(
'--regex',
help="""A regex to use as a mask for more complex cases.
Must have only one capturing group. Cannot be used with --mask."""
Must have only one capturing group. Cannot be used with --mask.""",
default=IndexImporter.DEFAULT_MASK,
)
def handle(self, *args, **options):
......@@ -48,14 +49,13 @@ class Command(BaseCommand):
if options['mask'] is not None and options['regex'] is not None:
raise CommandError('--mask and --regex cannot be used simultaneously.')
# If --regex is set, use it, else use the default mask.
mask_regex = options['regex'] or IndexImporter.DEFAULT_MASK
mask_regex = options['regex']
if options['mask'] is not None:
mask = options['mask']
assert mask.count('<ID>') == 1
# Replace <ID> with (.+) and escape the rest
mask_regex = '^' + r'(.+)'.join(re.escape(p) for p in mask.split('<ID>')) + '$'
assert mask.count('<PATH>') == 1
# Replace <PATH> with (.+) and escape the rest
mask_regex = '^' + r'(.+)'.join(re.escape(p) for p in mask.split('<PATH>')) + '$'
importer = IndexImporter(options['index_folder'], mask=mask_regex)
if options['dry_run']:
......
......@@ -56,5 +56,5 @@ class Command(BaseCommand):
servers,
options['manifest_folder'],
offline=options['offline'],
no_annotations=options['no_annotations'],
annotations=not options['no_annotations'],
).run()
......@@ -33,7 +33,7 @@ def import_indexes(image, page, index_path, extension='jpg'):
continue
index = REGEX_INDEX.match(line)
if index is None:
logger.warn('Index parsing failed : {}'.format(line))
logger.warning('Index parsing failed : {}'.format(line))
continue
# Build zone
......@@ -194,6 +194,6 @@ class IndexImporter(object):
except (Image.DoesNotExist, Image.MultipleObjectsReturned):
pass
if image is None:
print("{}\tFAIL".format(index_path))
logger.warning("{}\tFAIL".format(index_path))
else:
print("{}\t{}".format(index_path, image.path))
logger.info("{}\t{}".format(index_path, image.path))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment