1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
|
import inspect
import os
import pathlib
from enum import Enum
from typing import Any, List, Optional, Type
import pytest
import azure.ai.evaluation as evaluators
from azure.ai.evaluation._legacy._adapters._check import MISSING_LEGACY_SDK
@pytest.fixture
def data_file():
data_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data")
return os.path.join(data_path, "evaluate_test_data.jsonl")
def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]:
evaluators = []
for name, obj in inspect.getmembers(namespace):
if inspect.isclass(obj) and not issubclass(obj, Enum) and not issubclass(obj, dict):
if exceptions and name in exceptions:
continue
evaluators.append(obj)
return evaluators
@pytest.mark.unittest
@pytest.mark.skipif(MISSING_LEGACY_SDK, reason="This test has a promptflow dependency")
class TestSaveEval:
"""Test saving evaluators."""
EVALUATORS = get_evaluators_from_module(
evaluators,
exceptions=[
"AIAgentConverter",
"RedTeam",
"RedTeamOutput",
"AzureOpenAIGrader",
"AzureOpenAILabelGrader",
"AzureOpenAIStringCheckGrader",
"AzureOpenAITextSimilarityGrader"
])
@pytest.mark.parametrize("evaluator", EVALUATORS)
def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None:
"""Test regular evaluator saving."""
pf_client.flows.save(evaluator, path=tmpdir)
assert os.path.isfile(os.path.join(tmpdir, "flow.flex.yaml"))
def test_load_and_run_evaluators(self, tmpdir, pf_client, data_file) -> None:
"""Test regular evaluator saving."""
# Use a test eval because save/load feature breaks, seemingly in multiple ways, when
# evaluators have complex imports.
from test_evaluators.test_inputs_evaluators import EchoEval
pf_client.flows.save(EchoEval, path=tmpdir)
run = pf_client.run(tmpdir, data=data_file)
results_df = pf_client.get_details(run.name)
assert results_df is not None
all(results_df["outputs.echo_query"] == results_df["inputs.query"])
all(results_df["outputs.echo_response"] == results_df["inputs.response"])
|