File: sample_get_bounding_boxes.py

package info (click to toggle)
python-azure 20250603%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 851,724 kB
  • sloc: python: 7,362,925; ansic: 804; javascript: 287; makefile: 195; sh: 145; xml: 109
file content (141 lines) | stat: -rw-r--r-- 7,004 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_get_bounding_boxes.py

DESCRIPTION:
    This sample demonstrates how to get detailed information to visualize the outlines of
    form content and fields, which can be used for manual validation and drawing UI as part of an application.

    The model used in this sample can be created in the sample_train_model_without_labels.py using the
    training files in https://aka.ms/azsdk/formrecognizer/sampletrainingfiles-v3.1

USAGE:
    python sample_get_bounding_boxes.py

    Set the environment variables with your own values before running the sample:
    1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Form Recognizer resource.
    2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
    3) CUSTOM_TRAINED_MODEL_ID - the ID of your custom trained model
        -OR-
       CONTAINER_SAS_URL_V2 - The shared access signature (SAS) Url of your Azure Blob Storage container with your forms.
       A model will be trained and used to run the sample.
"""

import os


def format_bounding_box(bounding_box):
    """The points are listed in clockwise order: top-left, top-right, bottom-right, bottom-left.
    """
    if not bounding_box:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in bounding_box])


class GetBoundingBoxesSample(object):

    def get_bounding_boxes(self, custom_model_id):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_id = os.getenv("CUSTOM_TRAINED_MODEL_ID", custom_model_id)

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )

        path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__),
                                                            "..", "..", "./sample_forms/forms/Form_1.jpg"))
        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f, include_field_elements=True
            )
        forms = poller.result()

        for idx, form in enumerate(forms):
            print("--------RECOGNIZING FORM #{}--------".format(idx+1))
            print("Form has type: {}".format(form.form_type))
            for name, field in form.fields.items():
                # each field is of type FormField
                print("...Field '{}' has label '{}' with value '{}' within bounding box '{}', with a confidence score of {}".format(
                    name,
                    field.label_data.text if field.label_data else name,
                    field.value,
                    format_bounding_box(field.value_data.bounding_box),
                    field.confidence
                ))
            for page in form.pages:
                print("-------Recognizing Page #{} of Form #{}-------".format(page.page_number, idx+1))
                print("Page has width '{}' and height '{}' measure with unit: {}, and has text angle '{}'".format(
                    page.width, page.height, page.unit, page.text_angle
                ))
                for table in page.tables:
                    print("Table on page has the following cells:")
                    for cell in table.cells:
                        print("...Cell[{}][{}] has text '{}' with confidence {} based on the following words: ".format(
                            cell.row_index, cell.column_index, cell.text, cell.confidence
                        ))
                        # field_elements is only populated if you set include_field_elements=True
                        # It is a heterogeneous list of FormWord, FormLine, and FormSelectionMark
                        for element in cell.field_elements:
                            if element.kind == "word":
                                print("......Word '{}' within bounding box '{}' has a confidence of {}".format(
                                    element.text,
                                    format_bounding_box(element.bounding_box),
                                    element.confidence
                                ))
                            elif element.kind == "line":
                                print("......Line '{}' within bounding box '{}' has the following words: ".format(
                                    element.text,
                                    format_bounding_box(element.bounding_box)
                                ))
                                for word in element.words:
                                    print(".........Word '{}' within bounding box '{}' has a confidence of {}".format(
                                        word.text,
                                        format_bounding_box(word.bounding_box),
                                        word.confidence
                                    ))
                            elif element.kind == "selectionMark":
                                print("......Selection mark is '{}' within bounding box '{}' "
                                      "and has a confidence of {}".format(
                                        element.state,
                                        format_bounding_box(element.bounding_box),
                                        element.confidence
                                        ))
                print("---------------------------------------------------")
            print("-----------------------------------")


if __name__ == '__main__':
    sample = GetBoundingBoxesSample()
    model_id = None
    if os.getenv("CONTAINER_SAS_URL_V2"):

        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormTrainingClient

        endpoint = os.getenv("AZURE_FORM_RECOGNIZER_ENDPOINT")
        key = os.getenv("AZURE_FORM_RECOGNIZER_KEY")

        if not endpoint or not key:
            raise ValueError("Please provide endpoint and API key to run the samples.")

        form_training_client = FormTrainingClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )
        container_sas_url = os.getenv("CONTAINER_SAS_URL_V2")
        if container_sas_url is not None:
            model = form_training_client.begin_training(container_sas_url, use_training_labels=False).result()
            model_id = model.model_id

    sample.get_bounding_boxes(model_id)