File: test_conversation_pii_with_entity_mask_policy.py

package info (click to toggle)
python-azure 20251118%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 783,356 kB
  • sloc: python: 6,474,533; ansic: 804; javascript: 287; sh: 205; makefile: 198; xml: 109
file content (153 lines) | stat: -rw-r--r-- 6,709 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# pylint: disable=line-too-long,useless-suppression
import functools
import pytest

from devtools_testutils import AzureRecordedTestCase, EnvironmentVariableLoader, recorded_by_proxy
from azure.ai.language.conversations import ConversationAnalysisClient, AnalyzeConversationLROPoller
from azure.core.paging import ItemPaged
from azure.ai.language.conversations.models import (
    # request models
    AnalyzeConversationOperationInput,
    MultiLanguageConversationInput,
    TextConversation,
    TextConversationItem,
    PiiOperationAction,
    ConversationPiiActionContent,
    ConversationActions,
    AnalyzeConversationOperationResult,
    ConversationPiiOperationResult,
    ConversationalPiiResult,
    ConversationPiiItemResult,
    NamedEntity,
    InputWarning,
    ConversationError,
    AnalyzeConversationOperationAction,
    CharacterMaskPolicyType,
    RedactionCharacter,
    EntityMaskTypePolicyType,
)
from typing import cast, List
import re

from azure.core.credentials import AzureKeyCredential

ConversationsPreparer = functools.partial(
    EnvironmentVariableLoader,
    "conversations",
    conversations_endpoint="https://Sanitized.azure-api.net/",
    conversations_key="fake_key",
)


class TestConversations(AzureRecordedTestCase):

    # Start with any helper functions you might need, for example a client creation method:
    def create_client(self, endpoint, key):
        credential = AzureKeyCredential(key)
        client = ConversationAnalysisClient(endpoint, credential)
        return client

    ...


class TestConversationsCase(TestConversations):
    @ConversationsPreparer()
    @recorded_by_proxy
    def test_conversation_pii_with_entity_mask_policy(self, conversations_endpoint, conversations_key):
        client = self.create_client(conversations_endpoint, conversations_key)

        # Track redacted texts we verify
        redacted_verified: List[str] = []

        # ---- Redaction policy: mask entities as [Category] / [Category-1] ----
        redaction_policy = EntityMaskTypePolicyType()

        # ---- Build input -----------------------------------------------------
        ml_input = MultiLanguageConversationInput(
            conversations=[
                TextConversation(
                    id="1",
                    language="en",
                    conversation_items=[
                        TextConversationItem(id="1", participant_id="Agent_1", text="Can you provide your name?"),
                        TextConversationItem(id="2", participant_id="Customer_1", text="Hi, my name is John Doe."),
                        TextConversationItem(
                            id="3",
                            participant_id="Agent_1",
                            text="Thank you John, that has been updated in our system.",
                        ),
                    ],
                )
            ]
        )

        # Action with EntityMaskTypePolicyType
        pii_action: AnalyzeConversationOperationAction = PiiOperationAction(
            action_content=ConversationPiiActionContent(redaction_policy=redaction_policy),
            name="Conversation PII with Entity Mask Policy",
        )
        actions: List[AnalyzeConversationOperationAction] = [pii_action]

        operation_input = AnalyzeConversationOperationInput(
            conversation_input=ml_input,
            actions=actions,
        )

        # ---- Begin LRO -------------------------------------------------------
        poller: AnalyzeConversationLROPoller[ItemPaged[ConversationActions]] = client.begin_analyze_conversation_job(
            body=operation_input
        )

        print(f"Operation ID: {poller.details.get('operation_id')}")

        # Result: ItemPaged[ConversationActions]
        paged_actions: ItemPaged[ConversationActions] = poller.result()

        # Final-state metadata
        d = poller.details
        print(f"Job ID: {d.get('job_id')}")
        print(f"Status: {d.get('status')}")
        if d.get("errors"):
            print("Errors:")
            for err in d["errors"]:
                err = cast(ConversationError, err)
                print(f"  Code: {err.code} - {err.message}")

        # ---- Iterate results and validate redaction --------------------------
        for actions_page in paged_actions:
            for action_result in actions_page.task_results or []:
                ar = cast(AnalyzeConversationOperationResult, action_result)
                print(f"\nAction Name: {getattr(ar, 'name', None)}")

                if isinstance(ar, ConversationPiiOperationResult):
                    for conversation in ar.results.conversations or []:
                        conversation = cast(ConversationalPiiResult, conversation)
                        print(f"Conversation: #{conversation.id}")

                        for item in conversation.conversation_items or []:
                            item = cast(ConversationPiiItemResult, item)
                            redacted_text = (getattr(item.redacted_content, "text", None) or "").strip()
                            print(f"Redacted Text: {redacted_text}")

                            # Only verify when there are detected entities in the original item
                            if item.entities and redacted_text:
                                for entity in item.entities:
                                    entity = cast(NamedEntity, entity)
                                    original_text = entity.text or ""
                                    # 1) original PII should not be present
                                    assert (
                                        original_text not in redacted_text
                                    ), f"Expected entity '{original_text}' to be redacted but found in: {redacted_text}"

                                    # 2) redaction should show an entity mask like [Person] or [Person-1]
                                    expected_mask_pattern = rf"\[{re.escape(entity.category)}-?\d*\]"
                                    assert re.search(expected_mask_pattern, redacted_text, flags=re.IGNORECASE), (
                                        f"Expected redacted text to contain an entity mask similar to "
                                        f"'[{entity.category}]' but got: {redacted_text}"
                                    )

                                redacted_verified.append(redacted_text)

        # ---- Assertions -------------------------------------------------------
        assert (d.get("status") or "").lower() in {"succeeded", "partiallysucceeded"}
        assert len(redacted_verified) > 0, "Expected at least one redacted line to be verified."