Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
B
Base Worker
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Workers
Base Worker
Commits
39d57818
Commit
39d57818
authored
4 years ago
by
Eva Bardou
Browse files
Options
Downloads
Patches
Plain Diff
Add tests
parent
00a573f6
No related branches found
No related tags found
No related merge requests found
Pipeline
#78347
passed
4 years ago
Stage: test
Stage: build
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
arkindex_worker/worker.py
+3
-2
3 additions, 2 deletions
arkindex_worker/worker.py
tests/conftest.py
+49
-1
49 additions, 1 deletion
tests/conftest.py
tests/test_base_worker.py
+295
-0
295 additions, 0 deletions
tests/test_base_worker.py
with
347 additions
and
3 deletions
arkindex_worker/worker.py
+
3
−
2
View file @
39d57818
...
...
@@ -22,6 +22,7 @@ from arkindex_worker.reporting import Reporter
from
arkindex_worker.utils
import
convert_str_uuid_to_hex
MANUAL_SLUG
=
"
manual
"
DATA_DIR
=
"
/data
"
CACHE_DIR
=
f
"
/data/
{
os
.
environ
.
get
(
'
TASK_ID
'
)
}
"
...
...
@@ -137,7 +138,7 @@ class BaseWorker(object):
parents_cache_paths
=
[]
for
parent
in
task
[
"
parents
"
]:
parent_cache_path
=
f
"
/data
/
{
parent
}
/db.sqlite
"
parent_cache_path
=
f
"
{
DATA_DIR
}
/
{
parent
}
/db.sqlite
"
if
os
.
path
.
isfile
(
parent_cache_path
):
parents_cache_paths
.
append
(
parent_cache_path
)
...
...
@@ -150,7 +151,7 @@ class BaseWorker(object):
cache_file
.
write
(
parent_cache_file
.
read
())
# Many parents caches, we have to merge all of them in our current task local cache
elif
len
(
parents_cache_paths
)
>
1
:
self
.
cache
.
merge_parent_caches
(
parents_cache_paths
)
self
.
cache
.
merge_parent
s
_caches
(
parents_cache_paths
)
def
load_secret
(
self
,
name
):
"""
Load all secrets described in the worker configuration
"""
...
...
This diff is collapsed.
Click to expand it.
tests/conftest.py
+
49
−
1
View file @
39d57818
...
...
@@ -9,8 +9,9 @@ import pytest
import
yaml
from
arkindex.mock
import
MockApiClient
from
arkindex_worker.cache
import
LocalDB
from
arkindex_worker.git
import
GitHelper
,
GitlabHelper
from
arkindex_worker.worker
import
ElementsWorker
from
arkindex_worker.worker
import
BaseWorker
,
ElementsWorker
FIXTURES_DIR
=
Path
(
__file__
).
resolve
().
parent
/
"
data
"
CACHE_DIR
=
str
(
Path
(
__file__
).
resolve
().
parent
/
"
data/cache
"
)
...
...
@@ -92,6 +93,42 @@ def handle_cache_file(monkeypatch):
os
.
remove
(
CACHE_FILE
)
@pytest.fixture
def
first_parent_folder
():
cache_dir
=
f
"
{
CACHE_DIR
}
/first_parent_id
"
os
.
mkdir
(
cache_dir
)
yield
if
os
.
path
.
isdir
(
cache_dir
):
os
.
rmdir
(
cache_dir
)
@pytest.fixture
def
second_parent_folder
():
cache_dir
=
f
"
{
CACHE_DIR
}
/second_parent_id
"
os
.
mkdir
(
cache_dir
)
yield
if
os
.
path
.
isdir
(
cache_dir
):
os
.
rmdir
(
cache_dir
)
@pytest.fixture
def
first_parent_cache
(
first_parent_folder
):
parent_cache
=
LocalDB
(
f
"
{
CACHE_DIR
}
/first_parent_id/db.sqlite
"
)
parent_cache
.
create_tables
()
yield
if
os
.
path
.
isfile
(
parent_cache
.
path
):
os
.
remove
(
parent_cache
.
path
)
@pytest.fixture
def
second_parent_cache
(
second_parent_folder
):
parent_cache
=
LocalDB
(
f
"
{
CACHE_DIR
}
/second_parent_id/db.sqlite
"
)
parent_cache
.
create_tables
()
yield
if
os
.
path
.
isfile
(
parent_cache
.
path
):
os
.
remove
(
parent_cache
.
path
)
@pytest.fixture
(
autouse
=
True
)
def
give_worker_version_id_env_variable
(
monkeypatch
):
monkeypatch
.
setenv
(
"
WORKER_VERSION_ID
"
,
"
12341234-1234-1234-1234-123412341234
"
)
...
...
@@ -164,6 +201,17 @@ def mock_elements_worker(monkeypatch, mock_worker_version_api):
return
worker
@pytest.fixture
def
mock_base_worker_with_cache
(
mocker
,
monkeypatch
,
mock_worker_version_api
):
"""
Build a BaseWorker using SQLite cache
"""
monkeypatch
.
setattr
(
sys
,
"
argv
"
,
[
"
worker
"
])
worker
=
BaseWorker
(
use_cache
=
True
)
monkeypatch
.
setenv
(
"
TASK_ID
"
,
"
my_task
"
)
mocker
.
patch
(
"
arkindex_worker.worker.DATA_DIR
"
,
CACHE_DIR
)
return
worker
@pytest.fixture
def
mock_elements_worker_with_cache
(
monkeypatch
,
mock_worker_version_api
):
"""
Build and configure an ElementsWorker using SQLite cache with fixed CLI parameters to avoid issues with pytest
"""
...
...
This diff is collapsed.
Click to expand it.
tests/test_base_worker.py
+
295
−
0
View file @
39d57818
# -*- coding: utf-8 -*-
import
json
import
logging
import
os
import
sys
...
...
@@ -9,8 +10,42 @@ import pytest
from
arkindex.mock
import
MockApiClient
from
arkindex_worker
import
logger
from
arkindex_worker.cache
import
CachedElement
,
CachedTranscription
,
LocalDB
from
arkindex_worker.utils
import
convert_str_uuid_to_hex
from
arkindex_worker.worker
import
BaseWorker
CACHE_DIR
=
str
(
Path
(
__file__
).
resolve
().
parent
/
"
data/cache
"
)
FIRST_PARENT_CACHE
=
f
"
{
CACHE_DIR
}
/first_parent_id/db.sqlite
"
SECOND_PARENT_CACHE
=
f
"
{
CACHE_DIR
}
/second_parent_id/db.sqlite
"
FIRST_ELEM_TO_INSERT
=
CachedElement
(
id
=
convert_str_uuid_to_hex
(
"
11111111-1111-1111-1111-111111111111
"
),
parent_id
=
convert_str_uuid_to_hex
(
"
12341234-1234-1234-1234-123412341234
"
),
type
=
"
something
"
,
polygon
=
json
.
dumps
([[
1
,
1
],
[
2
,
2
],
[
2
,
1
],
[
1
,
2
]]),
worker_version_id
=
convert_str_uuid_to_hex
(
"
56785678-5678-5678-5678-567856785678
"
),
)
SECOND_ELEM_TO_INSERT
=
CachedElement
(
id
=
convert_str_uuid_to_hex
(
"
22222222-2222-2222-2222-222222222222
"
),
parent_id
=
convert_str_uuid_to_hex
(
"
12341234-1234-1234-1234-123412341234
"
),
type
=
"
something
"
,
polygon
=
json
.
dumps
([[
1
,
1
],
[
2
,
2
],
[
2
,
1
],
[
1
,
2
]]),
worker_version_id
=
convert_str_uuid_to_hex
(
"
56785678-5678-5678-5678-567856785678
"
),
)
FIRST_TR_TO_INSERT
=
CachedTranscription
(
id
=
convert_str_uuid_to_hex
(
"
11111111-1111-1111-1111-111111111111
"
),
element_id
=
convert_str_uuid_to_hex
(
"
11111111-1111-1111-1111-111111111111
"
),
text
=
"
Hello!
"
,
confidence
=
0.42
,
worker_version_id
=
convert_str_uuid_to_hex
(
"
56785678-5678-5678-5678-567856785678
"
),
)
SECOND_TR_TO_INSERT
=
CachedTranscription
(
id
=
convert_str_uuid_to_hex
(
"
22222222-2222-2222-2222-222222222222
"
),
element_id
=
convert_str_uuid_to_hex
(
"
22222222-2222-2222-2222-222222222222
"
),
text
=
"
How are you?
"
,
confidence
=
0.42
,
worker_version_id
=
convert_str_uuid_to_hex
(
"
56785678-5678-5678-5678-567856785678
"
),
)
def
test_init_default_local_share
(
monkeypatch
):
worker
=
BaseWorker
()
...
...
@@ -115,6 +150,266 @@ def test_cli_arg_verbose_given(mocker, mock_worker_version_api, mock_user_api):
logger
.
setLevel
(
logging
.
NOTSET
)
def
test_configure_cache_merging_no_parent
(
responses
,
mock_base_worker_with_cache
):
responses
.
add
(
responses
.
GET
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
status
=
200
,
json
=
{
"
parents
"
:
[]},
)
cache_path
=
mock_base_worker_with_cache
.
cache
.
path
with
open
(
cache_path
,
"
rb
"
)
as
before_file
:
before
=
before_file
.
read
()
mock_base_worker_with_cache
.
configure
()
with
open
(
cache_path
,
"
rb
"
)
as
after_file
:
after
=
after_file
.
read
()
assert
before
==
after
,
"
Cache was modified
"
assert
len
(
responses
.
calls
)
==
3
assert
[
call
.
request
.
url
for
call
in
responses
.
calls
]
==
[
"
http://testserver/api/v1/user/
"
,
"
http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/
"
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
]
def
test_configure_cache_merging_one_parent_without_file
(
responses
,
mock_base_worker_with_cache
,
first_parent_folder
):
responses
.
add
(
responses
.
GET
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
status
=
200
,
json
=
{
"
parents
"
:
[
"
first_parent_id
"
]},
)
cache_path
=
mock_base_worker_with_cache
.
cache
.
path
with
open
(
cache_path
,
"
rb
"
)
as
before_file
:
before
=
before_file
.
read
()
mock_base_worker_with_cache
.
configure
()
with
open
(
cache_path
,
"
rb
"
)
as
after_file
:
after
=
after_file
.
read
()
assert
before
==
after
,
"
Cache was modified
"
assert
len
(
responses
.
calls
)
==
3
assert
[
call
.
request
.
url
for
call
in
responses
.
calls
]
==
[
"
http://testserver/api/v1/user/
"
,
"
http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/
"
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
]
def
test_configure_cache_merging_one_parent
(
responses
,
mock_base_worker_with_cache
,
first_parent_cache
):
parent_cache
=
LocalDB
(
FIRST_PARENT_CACHE
)
parent_cache
.
insert
(
"
elements
"
,
[
FIRST_ELEM_TO_INSERT
])
parent_cache
.
insert
(
"
transcriptions
"
,
[
FIRST_TR_TO_INSERT
])
responses
.
add
(
responses
.
GET
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
status
=
200
,
json
=
{
"
parents
"
:
[
"
first_parent_id
"
]},
)
mock_base_worker_with_cache
.
configure
()
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
)
assert
[
CachedElement
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_ELEM_TO_INSERT
]
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
)
assert
[
CachedTranscription
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_TR_TO_INSERT
]
assert
len
(
responses
.
calls
)
==
3
assert
[
call
.
request
.
url
for
call
in
responses
.
calls
]
==
[
"
http://testserver/api/v1/user/
"
,
"
http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/
"
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
]
def
test_configure_cache_merging_multiple_parents_one_file
(
responses
,
mock_base_worker_with_cache
,
first_parent_cache
,
second_parent_folder
):
parent_cache
=
LocalDB
(
FIRST_PARENT_CACHE
)
parent_cache
.
insert
(
"
elements
"
,
[
FIRST_ELEM_TO_INSERT
])
parent_cache
.
insert
(
"
transcriptions
"
,
[
FIRST_TR_TO_INSERT
])
responses
.
add
(
responses
.
GET
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
status
=
200
,
json
=
{
"
parents
"
:
[
"
first_parent_id
"
,
"
second_parent_id
"
]},
)
mock_base_worker_with_cache
.
configure
()
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
)
assert
[
CachedElement
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_ELEM_TO_INSERT
]
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
)
assert
[
CachedTranscription
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_TR_TO_INSERT
]
assert
len
(
responses
.
calls
)
==
3
assert
[
call
.
request
.
url
for
call
in
responses
.
calls
]
==
[
"
http://testserver/api/v1/user/
"
,
"
http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/
"
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
]
def
test_configure_cache_merging_multiple_parents_differing_lines
(
responses
,
mock_base_worker_with_cache
,
first_parent_cache
,
second_parent_cache
):
# Inserting differing lines in both parents caches
parent_cache
=
LocalDB
(
FIRST_PARENT_CACHE
)
parent_cache
=
LocalDB
(
FIRST_PARENT_CACHE
)
parent_cache
.
insert
(
"
elements
"
,
[
FIRST_ELEM_TO_INSERT
])
parent_cache
.
insert
(
"
transcriptions
"
,
[
FIRST_TR_TO_INSERT
])
second_parent_cache
=
LocalDB
(
SECOND_PARENT_CACHE
)
second_parent_cache
.
insert
(
"
elements
"
,
[
SECOND_ELEM_TO_INSERT
])
second_parent_cache
.
insert
(
"
transcriptions
"
,
[
SECOND_TR_TO_INSERT
])
responses
.
add
(
responses
.
GET
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
status
=
200
,
json
=
{
"
parents
"
:
[
"
first_parent_id
"
,
"
second_parent_id
"
]},
)
mock_base_worker_with_cache
.
configure
()
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
+
second_parent_cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
)
assert
[
CachedElement
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_ELEM_TO_INSERT
,
SECOND_ELEM_TO_INSERT
,
]
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
+
second_parent_cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
)
assert
[
CachedTranscription
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_TR_TO_INSERT
,
SECOND_TR_TO_INSERT
,
]
assert
len
(
responses
.
calls
)
==
3
assert
[
call
.
request
.
url
for
call
in
responses
.
calls
]
==
[
"
http://testserver/api/v1/user/
"
,
"
http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/
"
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
]
def
test_configure_cache_merging_multiple_parents_identical_lines
(
responses
,
mock_base_worker_with_cache
,
first_parent_cache
,
second_parent_cache
):
# Inserting identical lines in both parents caches
parent_cache
=
LocalDB
(
FIRST_PARENT_CACHE
)
parent_cache
.
insert
(
"
elements
"
,
[
FIRST_ELEM_TO_INSERT
,
SECOND_ELEM_TO_INSERT
])
parent_cache
.
insert
(
"
transcriptions
"
,
[
FIRST_TR_TO_INSERT
,
SECOND_TR_TO_INSERT
])
second_parent_cache
=
LocalDB
(
SECOND_PARENT_CACHE
)
second_parent_cache
.
insert
(
"
elements
"
,
[
FIRST_ELEM_TO_INSERT
,
SECOND_ELEM_TO_INSERT
]
)
second_parent_cache
.
insert
(
"
transcriptions
"
,
[
FIRST_TR_TO_INSERT
,
SECOND_TR_TO_INSERT
]
)
responses
.
add
(
responses
.
GET
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
status
=
200
,
json
=
{
"
parents
"
:
[
"
first_parent_id
"
,
"
second_parent_id
"
]},
)
mock_base_worker_with_cache
.
configure
()
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
)
assert
(
stored_rows
==
second_parent_cache
.
cursor
.
execute
(
"
SELECT * FROM elements
"
).
fetchall
()
)
assert
[
CachedElement
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_ELEM_TO_INSERT
,
SECOND_ELEM_TO_INSERT
,
]
stored_rows
=
mock_base_worker_with_cache
.
cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
assert
(
stored_rows
==
parent_cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
)
assert
(
stored_rows
==
second_parent_cache
.
cursor
.
execute
(
"
SELECT * FROM transcriptions
"
).
fetchall
()
)
assert
[
CachedTranscription
(
**
dict
(
row
))
for
row
in
stored_rows
]
==
[
FIRST_TR_TO_INSERT
,
SECOND_TR_TO_INSERT
,
]
assert
len
(
responses
.
calls
)
==
3
assert
[
call
.
request
.
url
for
call
in
responses
.
calls
]
==
[
"
http://testserver/api/v1/user/
"
,
"
http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/
"
,
"
http://testserver/ponos/v1/task/my_task/from-agent/
"
,
]
def
test_load_missing_secret
():
worker
=
BaseWorker
()
worker
.
api_client
=
MockApiClient
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment