Skip to content
Snippets Groups Projects
reporting.py 5.49 KiB
# -*- coding: utf-8 -*-
import json
import traceback
import warnings
from collections import Counter
from datetime import datetime

from apistar.exceptions import ErrorResponse

from arkindex_worker import logger


class Reporter(object):
    def __init__(self, name):
        # TODO: use real data from workers
        self.report_data = {
            "slug": name,
            "version": "0.0",
            "started": datetime.utcnow().isoformat(),
            "elements": {},
        }
        logger.info(f"Starting ML report for {name}")

    def __repr__(self):
        return "{}({})".format(self.__class__.__name__, self.report_data["slug"])

    def _get_element(self, element_id):
        return self.report_data["elements"].setdefault(
            str(element_id),
            {
                "started": datetime.utcnow().isoformat(),
                # Created element counts, by type slug
                "elements": {},
                # Created transcriptions count
                "transcriptions": 0,
                # Created classification counts, by class
                "classifications": {},
                # Created entities ({"id": "", "type": "", "name": ""}) from this element
                "entities": [],
                # Created metadata ({"id": "", "type": "", "name": ""}) from this element
                "metadata": [],
                "errors": [],
            },
        )

    def process(self, element_id):
        """
        Report that a specific element ID is being processed.
        """
        # Just call the element initializer
        self._get_element(element_id)

    def add_element(self, parent_id, type, type_count=1):
        """
        Report creating a single element with a parent.
        Multiple elements with the same type and parent can be declared with the type_count parameter.
        """
        elements = self._get_element(parent_id)["elements"]
        elements.setdefault(type, 0)
        elements[type] += type_count

    def add_classification(self, element_id, class_name):
        """
        Report creating a classification on an element.
        """
        classifications = self._get_element(element_id)["classifications"]
        classifications.setdefault(class_name, 0)
        classifications[class_name] += 1

    def add_classifications(self, element_id, classifications):
        """
        Report one or more classifications at once.
        """
        assert isinstance(
            classifications, list
        ), "A list is required for classifications"
        element = self._get_element(element_id)
        # Retrieve the previous existing classification counts, if any
        counter = Counter(**element["classifications"])
        # Add the new ones
        counter.update(
            [classification["class_name"] for classification in classifications]
        )
        element["classifications"] = dict(counter)

    def add_transcription(self, element_id, type=None, type_count=None):
        """
        Report creating a transcription on an element.
        Multiple transcriptions with the same parent can be declared with the type_count parameter.
        """
        if type_count is None:
            if isinstance(type, int):
                type_count, type = type, None
            else:
                type_count = 1

        if type is not None:
            warnings.warn(
                "Transcription types have been deprecated and will be removed in the next release.",
                FutureWarning,
            )

        self._get_element(element_id)["transcriptions"] += type_count

    def add_transcriptions(self, element_id, transcriptions):
        """
        Report one or more transcriptions at once.
        """
        assert isinstance(transcriptions, list), "A list is required for transcriptions"
        warnings.warn(
            "Reporter.add_transcriptions is deprecated due to transcription types being removed. Please use Reporter.add_transcription(element_id, count) instead.",
            FutureWarning,
        )
        self.add_transcription(element_id, len(transcriptions))

    def add_entity(self, element_id, entity_id, type, name):
        """
        Report creating an entity from an element.
        """
        entities = self._get_element(element_id)["entities"]
        entities.append({"id": entity_id, "type": type, "name": name})

    def add_entity_link(self, *args, **kwargs):
        raise NotImplementedError

    def add_entity_role(self, *args, **kwargs):
        raise NotImplementedError

    def add_metadata(self, element_id, metadata_id, type, name):
        """
        Report creating a metadata from an element.
        """
        metadata = self._get_element(element_id)["metadata"]
        metadata.append({"id": metadata_id, "type": type, "name": name})

    def error(self, element_id, exception):
        error_data = {
            "class": exception.__class__.__name__,
            "message": str(exception),
        }
        if exception.__traceback__ is not None:
            error_data["traceback"] = "\n".join(
                traceback.format_tb(exception.__traceback__)
            )

        if isinstance(exception, ErrorResponse):
            error_data["message"] = exception.title
            error_data["status_code"] = exception.status_code
            error_data["content"] = exception.content

        self._get_element(element_id)["errors"].append(error_data)

    def save(self, path):
        logger.info(f"Saving ML report to {path}")
        with open(path, "w") as f:
            json.dump(self.report_data, f)