File: issue_21.py

package info (click to toggle)
python-pyahocorasick 1.4.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 748 kB
  • sloc: ansic: 4,554; python: 2,823; sh: 312; makefile: 242
file content (54 lines) | stat: -rw-r--r-- 1,369 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
    Aho-Corasick string search algorithm.

    Author    : Wojciech Muła, wojciech_mula@poczta.onet.pl
    WWW       : http://0x80.pl
    License   : public domain
"""

import pyahocorasick

test_cases = [
    # example provided by @Ulitochka
    {
        'words'     : ["alpha", "alpha beta", "gamma", "gamma alpha"],
        'input'     : "I went to alpha beta the alpha other day gamma alpha to pick up some spam",
        'expected'  : [("alpha beta", 19), ("alpha", 29), ("gamma alpha", 51)]
    },

    {
        'words'     : ["alpha", "alpha beta", "beta gamma", "gamma"],
        'input'     : "Cats have not idea what alpha beta gamma means",
        'expected'  : [("alpha beta", 33), ("gamma", 39)]
    },

    {
        'words'     : ["alpha", "alpha beta", "beta gamma", "gamma"],
        'input'     : "Cats have not idea what alpha beta gamma",
        'expected'  : [("alpha beta", 33), ("gamma", 39)]
    },
]


def test(case):

    tree = pyahocorasick.Trie()
    for word in case['words']:
        tree.add_word(word, word)

    tree.make_automaton()

    actual = [item for item in tree.iter_long(case['input'])]

    if actual != case['expected']:
        print("ERROR:")
        print(actual)
        print(case['expected'])
        assert(False)


if __name__ == '__main__':
    for data in test_cases:
        test(data)

    print("OK")