File: sample_manage_classifiers.py

package info (click to toggle)
python-azure 20250603%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 851,724 kB
  • sloc: python: 7,362,925; ansic: 804; javascript: 287; makefile: 195; sh: 145; xml: 109
file content (125 lines) | stat: -rw-r--r-- 5,391 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: sample_manage_classifiers.py

DESCRIPTION:
    This sample demonstrates how to manage the classifiers on your account. To learn
    how to build a classifier, look at sample_build_classifier.py.

USAGE:
    python sample_manage_classifiers.py

    Set the environment variables with your own values before running the sample:
    1) DOCUMENTINTELLIGENCE_ENDPOINT - the endpoint to your Document Intelligence resource.
    2) DOCUMENTINTELLIGENCE_API_KEY - your Document Intelligence API key.
    3) DOCUMENTINTELLIGENCE_TRAINING_DATA_CLASSIFIER_SAS_URL - The shared access signature (SAS) Url of your Azure Blob Storage container
"""

import os


def sample_manage_classifiers():
    # [START build_classifier]
    import uuid
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.documentintelligence import DocumentIntelligenceAdministrationClient
    from azure.ai.documentintelligence.models import (
        AzureBlobContentSource,
        ClassifierDocumentTypeDetails,
        BuildDocumentClassifierRequest,
        DocumentClassifierDetails,
    )

    endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
    key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]
    container_sas_url = os.environ["DOCUMENTINTELLIGENCE_TRAINING_DATA_CLASSIFIER_SAS_URL"]

    document_intelligence_admin_client = DocumentIntelligenceAdministrationClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )

    # build a document classifier
    poller = document_intelligence_admin_client.begin_build_classifier(
        BuildDocumentClassifierRequest(
            classifier_id=str(uuid.uuid4()),
            doc_types={
                "IRS-1040-A": ClassifierDocumentTypeDetails(
                    azure_blob_source=AzureBlobContentSource(container_url=container_sas_url, prefix="IRS-1040-A/train")
                ),
                "IRS-1040-D": ClassifierDocumentTypeDetails(
                    azure_blob_source=AzureBlobContentSource(container_url=container_sas_url, prefix="IRS-1040-D/train")
                ),
            },
            description="IRS document classifier",
        )
    )
    classifier: DocumentClassifierDetails = poller.result()
    print(f"Built classifier with ID: {classifier.classifier_id}")
    print(f"API version used to build the classifier model: {classifier.api_version}")
    print(f"Classifier description: {classifier.description}")
    print(f"Document classes used for training the model:")
    for doc_type, details in classifier.doc_types.items():
        print(f"Document type: {doc_type}")
        if details.azure_blob_source:
            print(f"Container source: {details.azure_blob_source.container_url}\n")
    # [END build_classifier]

    # Next, we get a paged list of all of our document classifiers
    # [START list_classifiers]
    classifiers = document_intelligence_admin_client.list_classifiers()

    print("We have the following 'ready' models with IDs and descriptions:")
    for classifier in classifiers:
        print(f"{classifier.classifier_id} | {classifier.description}")
    # [END list_classifiers]

    # [START get_classifier]
    my_classifier = document_intelligence_admin_client.get_classifier(classifier_id=classifier.classifier_id)
    print(f"\nClassifier ID: {my_classifier.classifier_id}")
    print(f"Description: {my_classifier.description}")
    print(f"Classifier created on: {my_classifier.created_date_time}")
    print(f"Classifier expires on: {my_classifier.expiration_date_time}")
    # [END get_classifier]

    # Finally, we will delete this classifier by ID
    # [START delete_classifier]
    document_intelligence_admin_client.delete_classifier(classifier_id=my_classifier.classifier_id)
    # [END delete_document_classifier]

    from azure.core.exceptions import ResourceNotFoundError

    try:
        document_intelligence_admin_client.get_classifier(classifier_id=my_classifier.classifier_id)
    except ResourceNotFoundError:
        print(f"Successfully deleted classifier with ID {my_classifier.classifier_id}")


if __name__ == "__main__":
    from azure.core.exceptions import HttpResponseError
    from dotenv import find_dotenv, load_dotenv

    try:
        load_dotenv(find_dotenv())
        sample_manage_classifiers()
    except HttpResponseError as error:
        # Examples of how to check an HttpResponseError
        # Check by error code:
        if error.error is not None:
            if error.error.code == "InvalidImage":
                print(f"Received an invalid image error: {error.error}")
            if error.error.code == "InvalidRequest":
                print(f"Received an invalid request error: {error.error}")
            # Raise the error again after printing it
            raise
        # If the inner error is None and then it is possible to check the message to get more information:
        if "Invalid request".casefold() in error.message.casefold():
            print(f"Uh-oh! Seems there was an invalid request: {error}")
        # Raise the error again
        raise