1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
# pylint: disable=line-too-long,useless-suppression
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------
"""
DESCRIPTION:
Given an AIProjectClient, this sample demonstrates how to use the synchronous
`.evaluations` methods to create, get and list evaluations. It uses additional
Azure OpenAI graders for evaluation.
USAGE:
python sample_evaluations_aoai_graders.py
Before running the sample:
pip install azure-ai-projects azure-identity
Set these environment variables with your own values:
1) PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your
Azure AI Foundry project. It has the form: https://<account_name>.services.ai.azure.com/api/projects/<project_name>.
2) CONNECTION_NAME - Required. The name of the connection of type Azure Storage Account, to use for the dataset upload.
3) MODEL_ENDPOINT - Required. The Azure OpenAI endpoint associated with your Foundry project.
It can be found in the Foundry overview page. It has the form https://<account_name>.openai.azure.com.
4) MODEL_API_KEY - Required. The API key for the model endpoint. Can be found under "key" in the model details page
(click "Models + endpoints" and select your model to get to the model details page).
5) MODEL_DEPLOYMENT_NAME - Required. The name of the model deployment to use for evaluation.
6) DATASET_NAME - Optional. The name of the Dataset to create and use in this sample.
7) DATASET_VERSION - Optional. The version of the Dataset to create and use in this sample.
8) DATA_FOLDER - Optional. The folder path where the data files for upload are located.
"""
import os
from mimetypes import inited
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import (
Evaluation,
InputDataset,
EvaluatorConfiguration,
EvaluatorIds,
DatasetVersion,
)
endpoint = os.environ[
"PROJECT_ENDPOINT"
] # Sample : https://<account_name>.services.ai.azure.com/api/projects/<project_name>
connection_name = os.environ["CONNECTION_NAME"]
model_endpoint = os.environ["MODEL_ENDPOINT"] # Sample: https://<account_name>.openai.azure.com.
model_api_key = os.environ["MODEL_API_KEY"]
model_deployment_name = os.environ["MODEL_DEPLOYMENT_NAME"] # Sample : gpt-4o-mini
dataset_name = os.environ.get("DATASET_NAME", "dataset-test")
dataset_version = os.environ.get("DATASET_VERSION", "1.0")
# Construct the paths to the data folder and data file used in this sample
script_dir = os.path.dirname(os.path.abspath(__file__))
data_folder = os.environ.get("DATA_FOLDER", os.path.join(script_dir, "data_folder"))
data_file = os.path.join(data_folder, "sample_data_evaluation.jsonl")
with DefaultAzureCredential(exclude_interactive_browser_credential=False) as credential:
with AIProjectClient(endpoint=endpoint, credential=credential) as project_client:
dataset: DatasetVersion = project_client.datasets.upload_file(
name=dataset_name,
version=dataset_version,
file_path=data_file,
connection_name=connection_name,
)
print(dataset)
print("Create an evaluation")
evaluation: Evaluation = Evaluation(
display_name="Sample Evaluation Test",
description="Sample evaluation for testing",
# Sample Dataset Id : azureai://accounts/<account_name>/projects/<project_name>/data/<dataset_name>/versions/<version>
data=InputDataset(id=dataset.id if dataset.id else ""),
evaluators={
"relevance": EvaluatorConfiguration(
id=EvaluatorIds.RELEVANCE.value,
init_params={
"deployment_name": model_deployment_name,
},
data_mapping={
"query": "${data.query}",
"response": "${data.response}",
},
),
"violence": EvaluatorConfiguration(
id=EvaluatorIds.VIOLENCE.value,
init_params={
"azure_ai_project": endpoint,
},
),
"bleu_score": EvaluatorConfiguration(
id=EvaluatorIds.BLEU_SCORE.value,
),
"string_check": EvaluatorConfiguration(
id=EvaluatorIds.STRING_CHECK_GRADER.value,
init_params={
"input": "{{item.query}}",
"name": "starts with what is",
"operation": "like",
"reference": "What is",
"deployment_name": model_deployment_name,
},
),
"label_model": EvaluatorConfiguration(
id=EvaluatorIds.LABEL_GRADER.value,
init_params={
"input": [{"content": "{{item.query}}", "role": "user"}],
"labels": ["too short", "just right", "too long"],
"passing_labels": ["just right"],
"model": model_deployment_name,
"name": "label",
"deployment_name": model_deployment_name,
},
),
"text_similarity": EvaluatorConfiguration(
id=EvaluatorIds.TEXT_SIMILARITY_GRADER.value,
init_params={
"evaluation_metric": "fuzzy_match",
"input": "{{item.query}}",
"name": "similarity",
"pass_threshold": 1,
"reference": "{{item.query}}",
"deployment_name": model_deployment_name,
},
),
"general": EvaluatorConfiguration(
id=EvaluatorIds.GENERAL_GRADER.value,
init_params={
"deployment_name": model_deployment_name,
"grader_config": {
"input": "{{item.query}}",
"name": "contains hello",
"operation": "like",
"reference": "hello",
"type": "string_check",
},
},
),
},
)
evaluation_response: Evaluation = project_client.evaluations.create(
evaluation,
headers={
"model-endpoint": model_endpoint,
"model-api-key": model_api_key,
},
)
print(evaluation_response)
print("Get evaluation")
get_evaluation_response: Evaluation = project_client.evaluations.get(evaluation_response.name)
print(get_evaluation_response)
print("List evaluations")
for evaluation in project_client.evaluations.list():
print(evaluation)
|