File: test_language_detect.py

package info (click to toggle)
dateparser 1.2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,140 kB
  • sloc: python: 52,721; makefile: 155; sh: 15
file content (118 lines) | stat: -rw-r--r-- 3,849 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import pytest

pytest.importorskip("fasttext")

import unittest
from datetime import datetime
from unittest.mock import Mock

from parameterized import param, parameterized

from dateparser import parse
from dateparser.custom_language_detection.fasttext import (
    detect_languages as fast_text_detect_languages,
)
from dateparser.custom_language_detection.langdetect import (
    detect_languages as lang_detect_detect_languages,
)
from dateparser.date import DateDataParser
from dateparser.search import search_dates

detect_languages = Mock()
detect_languages.return_value = ["en"]


class CustomLangDetectParserTest(unittest.TestCase):
    def check_is_returned_list(self):
        self.assertEqual(type(self.result), list)

    @parameterized.expand(
        [
            param(dt_string="14 June 2020", confidence_threshold=0.0),
            param(dt_string="26 July 2021", confidence_threshold=0.0),
        ]
    )
    def test_custom_language_detect_fast_text(self, dt_string, confidence_threshold):
        self.result = fast_text_detect_languages(dt_string, confidence_threshold)
        self.check_is_returned_list()

    @parameterized.expand(
        [
            param(dt_string="14 June 2020", confidence_threshold=0.0),
        ]
    )
    def test_custom_language_detect_lang_detect(self, dt_string, confidence_threshold):
        self.result = lang_detect_detect_languages(dt_string, confidence_threshold)
        self.check_is_returned_list()

    @parameterized.expand(
        [
            param(dt_string="10-10-2021", confidence_threshold=0.5),
        ]
    )
    def test_lang_detect_doesnt_raise_error(self, dt_string, confidence_threshold):
        result = lang_detect_detect_languages(dt_string, confidence_threshold)
        assert result == []

    # Mock test for parse, search_dates and DateDataParser

    detect_languages = Mock()
    detect_languages.return_value = ["en"]

    # parse

    def when_date_is_parsed_using_parse(self, dt_string):
        self.result = parse(dt_string, detect_languages_function=detect_languages)

    def then_date_obj_exactly_is(self, expected_date_obj):
        self.assertEqual(expected_date_obj, self.result)

    @parameterized.expand(
        [
            param("Tuesday Jul 22, 2014", datetime(2014, 7, 22, 0, 0, 0)),
        ]
    )
    def test_custom_language_detect_mock_parse(self, dt_string, expected_date_obj):
        self.when_date_is_parsed_using_parse(dt_string)
        self.then_date_obj_exactly_is(expected_date_obj)

    # DateDataParser

    def when_date_is_parsed_using_with_datedataparser(self, dt_string):
        ddp = DateDataParser(detect_languages_function=detect_languages)
        self.result = ddp.get_date_data(dt_string)["date_obj"]

    @parameterized.expand(
        [
            param("Tuesday Jul 22, 2014", datetime(2014, 7, 22, 0, 0, 0)),
        ]
    )
    def test_custom_language_detect_mock_datedataparser(
        self, dt_string, expected_date_obj
    ):
        self.when_date_is_parsed_using_with_datedataparser(dt_string)
        self.then_date_obj_exactly_is(expected_date_obj)

    # search_date

    def when_date_is_parsed_using_with_search_dates(self, dt_string):
        self.result = search_dates(
            dt_string, detect_languages_function=detect_languages
        )

    @parameterized.expand(
        [
            param(
                "January 3, 2017 - February 1st",
                [
                    ("January 3, 2017", datetime(2017, 1, 3, 0, 0)),
                    ("February 1st", datetime(2017, 2, 1, 0, 0)),
                ],
            ),
        ]
    )
    def test_custom_language_detect_mock_search_dates(
        self, dt_string, expected_date_obj
    ):
        self.when_date_is_parsed_using_with_search_dates(dt_string)
        self.then_date_obj_exactly_is(expected_date_obj)