File: test_break_sentence.py

package info (click to toggle)
python-azure 20250603%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 851,724 kB
  • sloc: python: 7,362,925; ansic: 804; javascript: 287; makefile: 195; sh: 145; xml: 109
file content (77 lines) | stat: -rw-r--r-- 3,973 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

from devtools_testutils import recorded_by_proxy
from preparer import TextTranslationPreparer
from testcase import TextTranslationTest


class TestBreakSentence(TextTranslationTest):
    @TextTranslationPreparer()
    @recorded_by_proxy
    def test_autodetect(self, **kwargs):
        endpoint = kwargs.get("text_translation_endpoint")
        apikey = kwargs.get("text_translation_apikey")
        region = kwargs.get("text_translation_region")
        client = self.create_client(endpoint, apikey, region)
        input_text_elements = ["Hello world"]

        response = client.find_sentence_boundaries(body=input_text_elements)
        assert response is not None
        assert response[0].detected_language.language == "en"
        assert response[0].detected_language.score > 0.8 # Created bug: https://machinetranslation.visualstudio.com/MachineTranslation/_workitems/edit/164493
        assert response[0].sent_len[0] == 11

    @TextTranslationPreparer()
    @recorded_by_proxy
    def test_with_language(self, **kwargs):
        endpoint = kwargs.get("text_translation_endpoint")
        apikey = kwargs.get("text_translation_apikey")
        region = kwargs.get("text_translation_region")
        client = self.create_client(endpoint, apikey, region)

        input_text_elements = [
            "รวบรวมแผ่นคำตอบ ระยะเวลาของโครงการ วิธีเลือกชายในฝัน หมายเลขซีเรียลของระเบียน วันที่สิ้นสุดของโครงการเมื่อเสร็จสมบูรณ์ ปีที่มีการรวบรวม ทุกคนมีวัฒนธรรมและวิธีคิดเหมือนกัน ได้รับโทษจำคุกตลอดชีวิตใน ฉันลดได้ถึง 55 ปอนด์ได้อย่างไร  ฉันคิดว่าใครๆ ก็ต้องการกำหนดเมนูอาหารส่วนบุคคล"
        ]

        response = client.find_sentence_boundaries(body=input_text_elements, language="th")
        assert response is not None
        expected_lengths = [78, 41, 110, 46]
        for i, expected_length in enumerate(expected_lengths):
            assert expected_length == response[0].sent_len[i]

    @TextTranslationPreparer()
    @recorded_by_proxy
    def test_with_language_script(self, **kwargs):
        endpoint = kwargs.get("text_translation_endpoint")
        apikey = kwargs.get("text_translation_apikey")
        region = kwargs.get("text_translation_region")
        client = self.create_client(endpoint, apikey, region)

        input_text_elements = ["zhè shì gè cè shì。"]

        response = client.find_sentence_boundaries(body=input_text_elements, language="zh-Hans", script="Latn")
        assert response is not None
        assert response[0].sent_len[0] == 18

    @TextTranslationPreparer()
    @recorded_by_proxy
    def test_with_multiple_languages(self, **kwargs):
        endpoint = kwargs.get("text_translation_endpoint")
        apikey = kwargs.get("text_translation_apikey")
        region = kwargs.get("text_translation_region")
        client = self.create_client(endpoint, apikey, region)

        input_text_elements = [
            "hello world",
            "العالم هو مكان مثير جدا للاهتمام",
        ]

        response = client.find_sentence_boundaries(body=input_text_elements)
        assert response is not None
        assert response[0].detected_language.language == "en"
        assert response[1].detected_language.language == "ar"
        assert response[0].sent_len[0] == 11
        assert response[1].sent_len[0] == 32