File: test_agent_evaluators.py

package info (click to toggle)
python-azure 20251104%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 770,224 kB
  • sloc: python: 6,357,217; ansic: 804; javascript: 287; makefile: 198; sh: 193; xml: 109
file content (105 lines) | stat: -rw-r--r-- 4,316 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import pytest
from azure.ai.evaluation import evaluate, ToolCallAccuracyEvaluator
from azure.ai.evaluation._exceptions import EvaluationException


@pytest.mark.usefixtures("mock_model_config")
@pytest.mark.unittest
class TestEvaluate:
    def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
        tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=mock_model_config)

        # Test with missing tool_calls and response
        result = tool_call_accuracy(
            query="Where is the Eiffel Tower?",
            tool_definitions=[
                {
                    "name": "fetch_weather",
                    "description": "Fetches the weather information for the specified location.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The location to fetch weather for.",
                            }
                        },
                    },
                }
            ],
        )
        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
        assert (
            ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE
            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
        )

        # Test with missing tool_definitions
        result = tool_call_accuracy(
            query="Where is the Eiffel Tower?",
            tool_definitions=[],
            tool_calls=[
                {
                    "type": "tool_call",
                    "name": "fetch_weather",
                    "arguments": {"location": "Tokyo"},
                }
            ],
        )
        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
        assert (
            ToolCallAccuracyEvaluator._NO_TOOL_DEFINITIONS_MESSAGE
            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
        )

        # Test with response that has no tool calls
        result = tool_call_accuracy(
            query="Where is the Eiffel Tower?",
            response="The Eiffel Tower is in Paris.",
            tool_definitions=[
                {
                    "name": "fetch_weather",
                    "description": "Fetches the weather information for the specified location.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The location to fetch weather for.",
                            }
                        },
                    },
                }
            ],
        )
        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
        assert (
            ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE
            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
        )

        # Test with tool call for which definition is not provided
        result = tool_call_accuracy(
            query="Where is the Eiffel Tower?",
            tool_calls=[{"type": "tool_call", "name": "some_other_tool", "arguments": {}}],
            tool_definitions=[
                {
                    "name": "fetch_weather",
                    "description": "Fetches the weather information for the specified location.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The location to fetch weather for.",
                            }
                        },
                    },
                }
            ],
        )
        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
        assert (
            ToolCallAccuracyEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE
            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
        )