Skip to content
Snippets Groups Projects
Commit 952b816a authored by Nolan's avatar Nolan Committed by Yoann Schneider
Browse files

Model selector on HuggingFace demo

parent 7ebf0564
No related branches found
No related tags found
1 merge request!92Model selector on HuggingFace demo
Pipeline #72510 passed
......@@ -305,24 +305,32 @@ pip install gradio
The code was last tested with `gradio==3.18.0`.
![image](https://gitlab.com/teklia/dla/doc-ufcn/-/raw/main/hugging_face/resource/hf_doc_ufcn_line_hist.png)
![image](https://gitlab.com/teklia/dla/doc-ufcn/-/raw/main/hugging_face/resource/demo_doc_ufcn_hf.png)
An example is available in the `huggingface/` folder. You need to create a JSON configuration file with the following parameters:
An example is available in the `huggingface/` folder. You need to create a YAML configuration file with the following parameters:
| Parameter | Description | Default value |
| ---------------- | ------------------------------------------------------------------------------------------------------------------ | ----------------------------- |
| `model_name` | Name of the model |`doc-ufcn-generic-historical-line`|
| `classes_colors` | List with the colors of the classes | `["green"]`|
| `title` | Title of the app (supports Markdown) | **Required**|
| `description` | Description of the app (supports Markdown) | **Required**|
| `examples` | Paths towards the image examples. | **Required**|
| `examples` | Paths towards the image examples | **Required**|
| `models` | List of models | **Required**|
A ready-to-use configuration file is available in `huggingface/config.json`. You may update it to your needs.
In models parameter, for each model, fill in the following parameters:
| Parameter | Description | Default value |
| ---------------- | ------------------------------------------------------------------------------------------------------------------ | ----------------------------- |
| `model_name` | Name of the model |**Required**|
| `title` | Title of the model (supports Markdown) | **Required**|
| `description` | Description of the model (supports Markdown) | **Required**|
| `classes_colors` | List with the colors of the classes | **Required**|
A ready-to-use configuration file is available in `huggingface/config.yaml`. You may update it to your needs.
Once the configuration file is ready, just run the following command:
```shell
$ python3 hugging_face/app.py --config hugging_face/config.json
$ python3 hugging_face/app.py --config hugging_face/config.yaml
```
If the `--public` argument is specified, the script will generate a public, shareable link that you can send to anyone. More information about public links on gradio is available in the [gradio documentation](https://gradio.app/sharing-your-app/).
......@@ -330,7 +338,7 @@ If the `--public` argument is specified, the script will generate a public, shar
To generate the shareable link, use the following command:
```shell
$ python3 hugging_face/app.py --config hugging_face/config.json --public
$ python3 hugging_face/app.py --config hugging_face/config.yaml --public
```
The model's predictions will be shown on the application in JSON format:
......
......@@ -2,23 +2,22 @@
import argparse
import json
import os
from pathlib import Path
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw
from doc_ufcn import models
from doc_ufcn.main import DocUFCN
from hugging_face.config import parse_configurations
from hugging_face.tools import UFCNModel
# Parse the CLI arguments
parser = argparse.ArgumentParser(description="UFCN HuggingFace app")
parser.add_argument(
"--config",
type=Path,
required=True,
help="The JSON-formatted configuration file of the Hugging Face app",
help="The YAML-formatted configuration file of the Hugging Face app",
)
# Get the application's public mode (local or with sharing)
......@@ -34,37 +33,47 @@ args = parser.parse_args()
# Load the config
config = parse_configurations(args.config)
# Download the model
model_path, parameters = models.download_model(name=config["model_name"])
# Store classes_colors list
classes_colors = config["classes_colors"]
# Check that the paths of the examples are valid
for example in config["examples"]:
assert Path.exists(
Path(example)
), f"The path of the image '{example}' does not exist."
# Cached models, maps model_name to UFCNModel object
MODELS = {
model["model_name"]: UFCNModel(
name=model["model_name"],
colors=model["classes_colors"],
title=model["title"],
description=model["description"],
)
for model in config["models"]
}
# Store classes
classes = parameters["classes"]
# Create a list of models name
models_name = list(MODELS)
# Check that the number of colors is equal to the number of classes -1
assert len(classes) - 1 == len(
classes_colors
), f"The parameter classes_colors was filled with the wrong number of colors. {len(classes)-1} colors are expected instead of {len(classes_colors)}."
# Check that the paths of the examples are valid
for example in config["examples"]:
assert os.path.exists(example), f"The path of the image '{example}' does not exist."
def load_model(model_name) -> UFCNModel:
"""
Retrieve the model, and load its parameters/files if it wasn't done before.
# Load the model
model = DocUFCN(
no_of_classes=len(classes),
model_input_size=parameters["input_size"],
device="cpu",
)
model.load(model_path=model_path, mean=parameters["mean"], std=parameters["std"])
:param model_name: The name of the selected model
:return: The UFCNModel instance selected
"""
assert model_name in MODELS
model = MODELS[model_name]
# Load the model's files if it wasn't done before
if not model.loaded:
model.load()
return model
def query_image(image):
def query_image(model_name: gr.Dropdown, image: gr.Image) -> list([Image, json]):
"""
Draws the predicted polygons with the color provided by the model on an image
Loads a model and draws the predicted polygons with the color provided by the model on an image
:param model: A model selected in dropdown
:param image: An image to predict
:return: Image and dict, an image with the predictions and a
dictionary mapping an object idx (starting from 1) to a dictionary describing the detected object:
......@@ -73,8 +82,11 @@ def query_image(image):
- `channel` key : str, the name of the predicted class.
"""
# Load the model and get its classes, classes_colors and the model
ufcn_model = load_model(model_name)
# Make a prediction with the model
detected_polygons, probabilities, mask, overlap = model.predict(
detected_polygons, probabilities, mask, overlap = ufcn_model.model.predict(
input_image=image, raw_output=True, mask_output=True, overlap_output=True
)
......@@ -89,12 +101,12 @@ def query_image(image):
# Create the polygons on the copy of the image for each class with the corresponding color
# We do not draw polygons of the background channel (channel 0)
for channel in range(1, len(classes)):
for channel in range(1, ufcn_model.num_channels):
for i, polygon in enumerate(detected_polygons[channel]):
# Draw the polygons on the image copy.
# Loop through the class_colors list (channel 1 has color 0)
ImageDraw.Draw(img2).polygon(
polygon["polygon"], fill=classes_colors[channel - 1]
polygon["polygon"], fill=ufcn_model.colors[channel - 1]
)
# Build the dictionary
......@@ -107,12 +119,22 @@ def query_image(image):
# Confidence that the model predicts the polygon in the right place
"confidence": polygon["confidence"],
# The channel on which the polygon is predicted
"channel": classes[channel],
"channel": ufcn_model.classes[channel],
}
)
# Return the blend of the images and the dictionary formatted in json
return Image.blend(image, img2, 0.5), json.dumps(predict, indent=20)
return Image.blend(image, img2, 0.5), json.dumps(predict, indent=2)
def update_model(model_name: gr.Dropdown) -> str:
"""
Update the model title to the title of the current model
:param model_name: The name of the selected model
:return: A new title
"""
return f"## {MODELS[model_name].title}", MODELS[model_name].description
with gr.Blocks() as process_image:
......@@ -122,6 +144,21 @@ with gr.Blocks() as process_image:
# Create app description
gr.Markdown(config["description"])
# Create dropdown button
model_name = gr.Dropdown(models_name, value=models_name[0], label="Models")
# get models
selected_model: UFCNModel = MODELS[model_name.value]
# Create model title
model_title = gr.Markdown(f"## {selected_model.title}")
# Create model description
model_description = gr.Markdown(selected_model.description)
# Change model title and description when the model_id is update
model_name.change(update_model, model_name, [model_title, model_description])
# Create a first row of blocks
with gr.Row():
# Create a column on the left
......@@ -139,19 +176,20 @@ with gr.Blocks() as process_image:
# Create a row under the buttons
with gr.Row():
# Generate example images that can be used as input image
examples = gr.Examples(inputs=image, examples=config["examples"])
# Generate example images that can be used as input image for every model
gr.Examples(config["examples"], inputs=image)
# Create a column on the right
with gr.Column():
# Generates an output image that does not support upload
image_output = gr.Image(interactive=False)
with gr.Row():
# Generates an output image that does not support upload
image_output = gr.Image(interactive=False)
# Create a row under the predicted image
with gr.Row():
# Create a column so that the JSON output doesn't take the full size of the page
with gr.Column():
# Create a collapsible region
# # Create a collapsible region
with gr.Accordion("JSON"):
# Generates a json with the model predictions
json_output = gr.JSON()
......@@ -164,7 +202,9 @@ with gr.Blocks() as process_image:
)
# Create the button to submit the prediction
submit_button.click(query_image, inputs=image, outputs=[image_output, json_output])
submit_button.click(
query_image, inputs=[model_name, image], outputs=[image_output, json_output]
)
# Launch the application with the public mode (True or False)
process_image.launch(share=args.public)
{
"model_name": "doc-ufcn-generic-historical-line",
"classes_colors": ["green"],
"title":"doc-ufcn Line Detection Demo",
"description":"A demo showing a prediction from the [Teklia/doc-ufcn-generic-historical-line](https://huggingface.co/Teklia/doc-ufcn-generic-historical-line) model. The generic historical line detection model predicts text lines from document images.",
"examples":[
"hugging_face/resource/hugging_face_1.jpg",
"hugging_face/resource/hugging_face_2.jpg"
]
}
\ No newline at end of file
......@@ -7,21 +7,22 @@ from teklia_toolbox.config import ConfigParser
def parse_configurations(config_path: Path):
"""
Parse multiple JSON configuration files into a single source
Parse multiple YAML configuration files into a single source
of configuration for the HuggingFace app
:param config_path: pathlib.Path, Path to the .json config file
:param config_path: pathlib.Path, Path to the .yaml config file
:return: dict, containing the configuration. Ensures config is complete and with correct typing
"""
parser = ConfigParser()
parser.add_option(
"model_name", type=str, default="doc-ufcn-generic-historical-line"
)
parser.add_option("classes_colors", type=list, default=["green"])
parser.add_option("title", type=str)
parser.add_option("description", type=str)
parser.add_option("title")
parser.add_option("description")
parser.add_option("examples", type=list)
model_parser = parser.add_subparser("models", many=True)
model_parser.add_option("model_name")
model_parser.add_option("title")
model_parser.add_option("description")
model_parser.add_option("classes_colors", type=list)
return parser.parse(config_path)
---
title: Teklia - Doc-UFCN Demo
description: >-
[TEKLIA](https://teklia.com/)’s Document Layout Analysis on historical documents. For modern documents, see [ocelus.teklia.com](https://ocelus.teklia.com).
examples:
- hugging_face/resource/hugging_face_1.jpg
- hugging_face/resource/hugging_face_2.jpg
- hugging_face/resource/hugging_face_3.jpg
- hugging_face/resource/hugging_face_4.jpg
models:
- model_name: doc-ufcn-generic-historical-line
title: Doc-UFCN Generic historical line detection
description: >-
The [generic historical line detection model](https://huggingface.co/Teklia/doc-ufcn-generic-historical-line) predicts text lines from document images. Please select an image from the examples below or upload your own image!
classes_colors:
- green
- model_name: doc-ufcn-huginmunin-line
title: Doc-UFCN Hugin-Munin line detection
description: >-
The [Hugin-Munin line detection model](https://huggingface.co/Teklia/doc-ufcn-huginmunin-line) predicts horizontal and vertical text lines from Hugin-Munin document images. Please select an image from the examples below or upload your own image!
classes_colors:
- green
- blue
- model_name: doc-ufcn-generic-page
title: Doc-UFCN Generic page detection
description: >-
The [generic page detection model](https://huggingface.co/Teklia/doc-ufcn-generic-page) predicts single pages from document images. Please select an image from the examples below or upload your own image!
classes_colors:
- green
hugging_face/resource/demo_doc_ufcn_hf.png

1.14 MiB

hugging_face/resource/hf_doc_ufcn_line_hist.png

1.09 MiB

hugging_face/resource/hugging_face_1.jpg

499 KiB | W: | H:

hugging_face/resource/hugging_face_1.jpg

468 KiB | W: | H:

hugging_face/resource/hugging_face_1.jpg
hugging_face/resource/hugging_face_1.jpg
hugging_face/resource/hugging_face_1.jpg
hugging_face/resource/hugging_face_1.jpg
  • 2-up
  • Swipe
  • Onion skin
hugging_face/resource/hugging_face_2.jpg

404 KiB | W: | H:

hugging_face/resource/hugging_face_2.jpg

499 KiB | W: | H:

hugging_face/resource/hugging_face_2.jpg
hugging_face/resource/hugging_face_2.jpg
hugging_face/resource/hugging_face_2.jpg
hugging_face/resource/hugging_face_2.jpg
  • 2-up
  • Swipe
  • Onion skin
hugging_face/resource/hugging_face_3.jpg

404 KiB

hugging_face/resource/hugging_face_4.jpg

271 KiB

# -*- coding: utf-8 -*-
from dataclasses import dataclass, field
from doc_ufcn import models
from doc_ufcn.main import DocUFCN
@dataclass
class UFCNModel:
name: str
colors: list
title: str
description: str
classes: list = field(default_factory=list)
model: DocUFCN = None
def get_class_name(self, channel_idx):
return self.classes[channel_idx]
@property
def loaded(self):
return self.model is not None
@property
def num_channels(self):
return len(self.classes)
def load(self):
# Download the model
model_path, parameters = models.download_model(name=self.name)
# Store classes
self.classes = parameters["classes"]
# Check that the number of colors is equal to the number of classes -1
assert self.num_channels - 1 == len(
self.colors
), f"The parameter classes_colors was filled with the wrong number of colors. {self.num_channels-1} colors are expected instead of {len(self.colors)}."
# Load the model
self.model = DocUFCN(
no_of_classes=len(self.classes),
model_input_size=parameters["input_size"],
device="cpu",
)
self.model.load(
model_path=model_path, mean=parameters["mean"], std=parameters["std"]
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment