1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
# coding: utf-8
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
"""
FILE: sample_analyze_addon_formulas.py
DESCRIPTION:
This sample demonstrates how to extract all identified formulas, such as mathematical
equations, using the add-on 'FORMULAS' capability.
This sample uses Layout model to demonstrate.
Add-on capabilities accept a list of strings containing values from the `DocumentAnalysisFeature`
enum class. For more information, see:
https://aka.ms/azsdk/python/documentintelligence/analysisfeature.
The following capabilities are free:
- BARCODES
- LANGUAGES
The following capabilities will incur additional charges:
- FORMULAS
- OCR_HIGH_RESOLUTION
- STYLE_FONT
- QUERY_FIELDS
See pricing: https://azure.microsoft.com/pricing/details/ai-document-intelligence/.
USAGE:
python sample_analyze_addon_formulas.py
Set the environment variables with your own values before running the sample:
1) DOCUMENTINTELLIGENCE_ENDPOINT - the endpoint to your Document Intelligence resource.
2) DOCUMENTINTELLIGENCE_API_KEY - your Document Intelligence API key.
"""
import os
def analyze_formulas():
path_to_sample_documents = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"sample_forms/add_ons/formulas.pdf",
)
)
# [START analyze_formulas]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult
def _format_polygon(polygon):
if not polygon:
return "N/A"
return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])
endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]
document_intelligence_client = DocumentIntelligenceClient(endpoint=endpoint, credential=AzureKeyCredential(key))
# Specify which add-on capabilities to enable
with open(path_to_sample_documents, "rb") as f:
poller = document_intelligence_client.begin_analyze_document(
"prebuilt-layout",
body=f,
features=[DocumentAnalysisFeature.FORMULAS],
)
result: AnalyzeResult = poller.result()
# Iterate over extracted formulas on each page and print inline and display formulas
# separately.
for page in result.pages:
print(f"----Formulas detected from page #{page.page_number}----")
if page.formulas:
inline_formulas = [f for f in page.formulas if f.kind == "inline"]
display_formulas = [f for f in page.formulas if f.kind == "display"]
print(f"Detected {len(inline_formulas)} inline formulas.")
for formula_idx, formula in enumerate(inline_formulas):
print(f"- Inline #{formula_idx}: {formula.value}")
print(f" Confidence: {formula.confidence}")
print(f" Bounding regions: {_format_polygon(formula.polygon)}")
print(f"\nDetected {len(display_formulas)} display formulas.")
for formula_idx, formula in enumerate(display_formulas):
print(f"- Display #{formula_idx}: {formula.value}")
print(f" Confidence: {formula.confidence}")
print(f" Bounding regions: {_format_polygon(formula.polygon)}")
print("----------------------------------------")
# [END analyze_formulas]
if __name__ == "__main__":
from azure.core.exceptions import HttpResponseError
from dotenv import find_dotenv, load_dotenv
try:
load_dotenv(find_dotenv())
analyze_formulas()
except HttpResponseError as error:
# Examples of how to check an HttpResponseError
# Check by error code:
if error.error is not None:
if error.error.code == "InvalidImage":
print(f"Received an invalid image error: {error.error}")
if error.error.code == "InvalidRequest":
print(f"Received an invalid request error: {error.error}")
# Raise the error again after printing it
raise
# If the inner error is None and then it is possible to check the message to get more information:
if "Invalid request".casefold() in error.message.casefold():
print(f"Uh-oh! Seems there was an invalid request: {error}")
# Raise the error again
raise
|