Base Data Extraction class from SQLite export
Create new modules to support data extraction from an SQLite Arkindex export.
-
db.py
to store all your custom SQL queries and execution -
data.py
to implement a newDataGenerator
class, here is a small spec
Module spec
class DataGenerator:
def __init__(self, db_path: Path, element_type: str) -> None:
self.element_type = element_type
self.db = self.connect_db(db_path)
def connect_db(self, db_path: Path):
pass
def get_transcriptions(self, parent_id: UUID) -> List[Transcription]:
"""
Retrieve transcriptions under parent_id with type self.element_type
"""
pass
def get_image(self, image_url) -> np.ndarray:
"""
Download image -> convert to ndarray
"""
def _save_line_image(self, img, path, *args):
"""
Save line image, could be a method of Transcription
"""
def get_line_transcriptions(self, parent_id: UUID, output_path: Path):
"""
The main loop
"""
self.get_transcriptions()
# Retrieve page image
self.get_image()
for transcription in transcriptions:
# Could be a method of Transcription
line_image_extractor.extractor.extract(...)
# Save image
@dataclass
class Transcription:
text: str
label_path: Path
worker_version_id: UUID
element_id: UUID
image_url: str
image_path: Path