Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
A
api-client
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Arkindex
api-client
Commits
7b38b7e2
Commit
7b38b7e2
authored
5 years ago
by
kermorvant
Browse files
Options
Downloads
Patches
Plain Diff
example script to upload files to arkindex through S3
parent
08da09ac
No related branches found
No related tags found
1 merge request
!46
example script to upload files to arkindex through S3
Pipeline
#28215
failed
5 years ago
Stage: test
Stage: deploy
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
commands/import_local_files.py
+163
-0
163 additions, 0 deletions
commands/import_local_files.py
with
163 additions
and
0 deletions
commands/import_local_files.py
0 → 100644
+
163
−
0
View file @
7b38b7e2
#!/usr/bin/env python3
"""
Import local images in a directory to Arkindex.
"""
from
apistar.exceptions
import
ErrorResponse
from
arkindex
import
ArkindexClient
,
options_from_env
import
argparse
import
glob
import
hashlib
import
imghdr
import
logging
import
os
import
requests
import
uuid
SUPPORTED_IMG
=
[
'
jpeg
'
,
'
png
'
]
logging
.
basicConfig
(
format
=
'
[%(levelname)s] %(message)s
'
,
level
=
logging
.
INFO
,
)
logger
=
logging
.
getLogger
(
__name__
)
ark_client
=
ArkindexClient
()
class
LocalPage
(
object
):
"""
A local image to be uploaded.
"""
def
__init__
(
self
,
page_path
,
corpus_id
,
volume_id
,
index
):
"""
Init a local image parameters.
"""
self
.
page_path
=
page_path
self
.
page_name
=
os
.
path
.
basename
(
self
.
page_path
)
self
.
page_file
=
open
(
self
.
page_path
,
'
rb
'
)
self
.
corpus_id
=
corpus_id
self
.
volume_id
=
volume_id
self
.
index
=
index
logger
.
info
(
'
import {}
'
.
format
(
self
.
page_path
))
def
hash_image
(
self
):
"""
Create the hash for image validations.
"""
md5
=
hashlib
.
md5
()
for
chunk
in
iter
(
lambda
:
self
.
page_file
.
read
(
4096
),
b
""
):
md5
.
update
(
chunk
)
return
md5
.
hexdigest
()
def
upload_image
(
self
):
"""
Upload the image on S3 and validate it.
"""
logger
.
info
(
'
Creating image for page {}
'
.
format
(
self
.
page_path
))
try
:
self
.
image
=
ark_client
.
request
(
'
CreateImage
'
,
body
=
{
'
hash
'
:
self
.
hash_image
()})
except
ErrorResponse
as
e
:
if
e
.
status_code
==
400
and
'
id
'
in
e
.
content
:
self
.
image
=
ark_client
.
request
(
'
RetrieveImage
'
,
id
=
e
.
content
[
'
id
'
])
else
:
logger
.
info
(
'
Uploading image from page {} as {}
'
.
format
(
self
.
page_path
,
self
.
image
[
'
id
'
]))
self
.
page_file
.
seek
(
0
)
requests
.
put
(
self
.
image
[
'
s3_put_url
'
],
data
=
self
.
page_file
)
logger
.
info
(
'
Validating image {}
'
.
format
(
self
.
image
[
'
id
'
]))
ark_client
.
request
(
'
PartialUpdateImage
'
,
id
=
self
.
image
[
'
id
'
],
body
=
{
'
status
'
:
'
checked
'
})
def
create_page
(
self
):
"""
Create a page in Arkindex with S3 images.
"""
logger
.
info
(
'
Creating page {}
'
.
format
(
self
.
page_path
))
self
.
page
=
ark_client
.
request
(
'
CreateElement
'
,
body
=
{
'
corpus
'
:
self
.
corpus_id
,
'
type
'
:
'
page
'
,
'
parent
'
:
self
.
volume_id
,
'
name
'
:
self
.
page_name
,
'
image
'
:
self
.
image
[
'
id
'
],
'
metadata
'
:
{
'
folio
'
:
str
(
self
.
index
),
},
})
def
run
(
self
):
"""
Run the import of a local page.
"""
try
:
self
.
upload_image
()
self
.
create_page
()
except
AssertionError
as
e
:
logger
.
error
(
'
Failed importing page {}: {}
'
.
format
(
self
.
page_path
,
e
))
except
ErrorResponse
as
e
:
logger
.
error
(
'
Failed importing page {}: {} - {}
'
.
format
(
self
.
page_path
,
e
.
status_code
,
e
.
content
))
class
LocalVolume
():
"""
Volume for a local directory.
"""
def
__init__
(
self
,
directory
=
None
,
corpus
=
None
,
volume_name
=
None
):
"""
Init a volume with a local directory.
"""
self
.
local_dir
=
directory
self
.
volume_name
=
volume_name
self
.
corpus_id
=
str
(
corpus
)
def
create_volume
(
self
):
"""
Create the corresponding volume in Arkindex.
"""
logger
.
info
(
'
Creating volume {}
'
.
format
(
self
.
volume_name
))
self
.
volume
=
ark_client
.
request
(
'
CreateElement
'
,
body
=
{
'
corpus
'
:
self
.
corpus_id
,
'
type
'
:
'
volume
'
,
'
name
'
:
self
.
volume_name
,
})
def
run
(
self
):
"""
Run the local directory import.
"""
try
:
self
.
create_volume
()
idx
=
1
for
page_path
in
glob
.
glob
(
os
.
path
.
join
(
self
.
local_dir
,
'
*
'
)):
# Check that the file is an image
if
imghdr
.
what
(
page_path
)
in
SUPPORTED_IMG
:
LocalPage
(
page_path
,
corpus_id
=
self
.
corpus_id
,
volume_id
=
self
.
volume
[
'
id
'
],
index
=
idx
,
).
run
()
idx
+=
1
else
:
logger
.
info
(
'
Skip non image file {}
'
.
format
(
page_path
))
except
AssertionError
as
e
:
logger
.
error
(
'
Failed importing volume {}: {}
'
.
format
(
self
.
volume_name
,
e
))
except
ErrorResponse
as
e
:
logger
.
error
(
'
Failed importing volume {}: {} - {}
'
.
format
(
self
.
volume_name
,
e
.
status_code
,
e
.
content
))
def
main
():
"""
Collect arguments and run.
"""
parser
=
argparse
.
ArgumentParser
(
description
=
'
Import local files from directory
'
,
)
parser
.
add_argument
(
'
directory
'
,
help
=
'
path to local directory to import
'
,
)
parser
.
add_argument
(
'
--volume-name
'
,
help
=
'
name of the volume to be created
'
,
required
=
True
,
)
parser
.
add_argument
(
'
--corpus
'
,
help
=
'
UUID of an existing corpus to import into
'
,
type
=
uuid
.
UUID
,
required
=
True
,
)
parser
.
add_argument
(
'
--sleep
'
,
help
=
'
Throttle API requests by waiting for a given number of seconds
'
,
type
=
float
,
default
=
0
,
)
args
=
vars
(
parser
.
parse_args
())
ark_client
.
configure
(
sleep
=
args
.
pop
(
'
sleep
'
),
**
options_from_env
())
LocalVolume
(
**
args
).
run
()
if
__name__
==
'
__main__
'
:
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment