Skip to content

Base Data Extraction class from SQLite export

Create new modules to support data extraction from an SQLite Arkindex export.

  • db.py to store all your custom SQL queries and execution
  • data.py to implement a new DataGenerator class, here is a small spec
Module spec
class DataGenerator:
    def __init__(self, db_path: Path, element_type: str) -> None:
        self.element_type = element_type
        self.db = self.connect_db(db_path)
    
    def connect_db(self, db_path: Path):
        pass
    
    def get_transcriptions(self, parent_id: UUID) -> List[Transcription]:
        """
        Retrieve transcriptions under parent_id with type self.element_type
        """
        pass
    
    def get_image(self, image_url) -> np.ndarray:
        """
        Download image -> convert to ndarray
        """
    
    def _save_line_image(self, img, path, *args):
        """
        Save line image, could be a method of Transcription
        """
    
    def get_line_transcriptions(self, parent_id: UUID, output_path: Path):
        """
        The main loop
        """
        
        self.get_transcriptions()
        
        # Retrieve page image
        self.get_image()
        
        for transcription in transcriptions:
            # Could be a method of Transcription
            line_image_extractor.extractor.extract(...)
            
            # Save image
  

@dataclass
class Transcription:
    text: str
    label_path: Path
    worker_version_id: UUID
    element_id: UUID
    image_url: str
    image_path: Path