File: test_aoai_integration_features.py

package info (click to toggle)
python-azure 20250603%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 851,724 kB
  • sloc: python: 7,362,925; ansic: 804; javascript: 287; makefile: 195; sh: 145; xml: 109
file content (168 lines) | stat: -rw-r--r-- 6,126 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

import pytest

from azure.ai.evaluation._evaluate._evaluate_aoai import (
    _split_evaluators_and_grader_configs,
    _convert_remote_eval_params_to_grader
)

from azure.ai.evaluation import F1ScoreEvaluator
from azure.ai.evaluation import (
    AzureOpenAIGrader,
    AzureOpenAITextSimilarityGrader,
    AzureOpenAILabelGrader,
    AzureOpenAIStringCheckGrader,
)
from azure.ai.evaluation import AzureOpenAIModelConfiguration


@pytest.fixture
def mock_aoai_model_config():
    return AzureOpenAIModelConfiguration(
            azure_deployment="...",
            azure_endpoint="...",
            api_key="...",
            api_version="...",
        )


@pytest.fixture
def mock_grader_config():
    return {}

def _get_file(name):
    """Get the file from the unittest data folder."""
    import os, pathlib
    data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
    return os.path.join(data_path, name)

@pytest.fixture
def questions_file():
    return _get_file("questions.jsonl")

def simple_eval_function():
    return "123"

@pytest.mark.unittest
class TestAoaiIntegrationFeatures:
    def test_remote_eval_grader_generation(self, mock_aoai_model_config, mock_grader_config):
        """
        Test to ensure that the AoaiGrader class and its children validate their inputs
        properly.
        """
        # Needs a model config
        init_params = {}
        with pytest.raises(Exception) as excinfo:
            _convert_remote_eval_params_to_grader("", init_params=init_params)
        assert "Grader converter needs a valid 'model_config' key in init_params." in str(excinfo.value)

        # needs an ID
        init_params["model_config"] = mock_aoai_model_config
        init_params["grader_config"] = mock_grader_config
        
        with pytest.raises(Exception) as excinfo:
            _convert_remote_eval_params_to_grader("invalid id", init_params=init_params)
        assert "not recognized as an AOAI grader ID" in str(excinfo.value)

        # test general creation creation
        grader = _convert_remote_eval_params_to_grader(AzureOpenAIGrader.id, init_params=init_params)
        assert isinstance(grader, AzureOpenAIGrader)
        assert grader._model_config == mock_aoai_model_config
        assert grader._grader_config == mock_grader_config

        # Test text similarity creation
        init_params = {
            "model_config": mock_aoai_model_config,
            "evaluation_metric": "fuzzy_match",
            "input": "...",
            "pass_threshold": 0.5,
            "reference": "...",
            "name": "test",
        }
        grader = _convert_remote_eval_params_to_grader(AzureOpenAITextSimilarityGrader.id, init_params=init_params)
        assert isinstance(grader, AzureOpenAITextSimilarityGrader)
        assert grader._model_config == mock_aoai_model_config

        # Test string check creation
        init_params = {
            "model_config": mock_aoai_model_config,
            "input": "...",
            "name": "test",
            "operation": "eq",
            "reference": "...",
        }
        grader = _convert_remote_eval_params_to_grader(AzureOpenAIStringCheckGrader.id, init_params=init_params)
        assert isinstance(grader, AzureOpenAIStringCheckGrader)
        assert grader._model_config == mock_aoai_model_config

        # Test label creation
        init_params = {
            "model_config": mock_aoai_model_config,
            "input": [{"content": "...", "role": "user"}],
            "name": "test",
            "labels": ["label1", "label2"],
            "model": "gpt-35-turbo",
            "passing_labels": ["label1"],
        }
        grader = _convert_remote_eval_params_to_grader(AzureOpenAILabelGrader.id, init_params=init_params)
        assert isinstance(grader, AzureOpenAILabelGrader)
        assert grader._model_config == mock_aoai_model_config

    def test_grader_initialization(self, mock_aoai_model_config, mock_grader_config):
        bad_model_config = AzureOpenAIModelConfiguration(
            azure_deployment="...",
            azure_endpoint="...",
        )
        bad_grader_config = {}

        # Test with fully valid inputs
        AzureOpenAIGrader(
            model_config=mock_aoai_model_config,
            grader_config=mock_grader_config
        )

        # missing api_key in model config should throw an error 
        with pytest.raises(Exception) as excinfo:
            AzureOpenAIGrader(
                model_config=bad_model_config,
                grader_config=mock_grader_config
            )
        assert "Requires an api_key in the supplied model_config." in str(excinfo.value)

        # Test that validation bypass works to simplify other tests
        AzureOpenAIGrader(
            model_config=bad_model_config,
            grader_config=bad_grader_config,
            validate=False
        )

        # TODO add checks for bad grader config... maybe.
        # Need to decide if we really want grader validation at base grader level.

    def test_evaluate_grader_recognition(self, mock_aoai_model_config, mock_grader_config):
        """
        Test that checks the ability of the _split_evaluators_and_grader_configs
        method to correctly ID and separate normal, callable evaluators, and
        AOAI graders.
        """
        built_in_eval = F1ScoreEvaluator()
        custom_eval = lambda x: x
        aoai_grader = AzureOpenAIGrader(model_config=mock_aoai_model_config, grader_config=mock_grader_config)
        evaluators = {
            "f1_score": built_in_eval,
            "custom_eval": custom_eval,
            "aoai_grader": aoai_grader
        }

        just_evaluators, aoai_graders = _split_evaluators_and_grader_configs(evaluators)

        assert len(just_evaluators) == 2
        assert len(aoai_graders) == 1

        assert "f1_score" in just_evaluators
        assert "custom_eval" in just_evaluators
        assert "aoai_grader" in aoai_graders