File: __init__.py

package info (click to toggle)
python-pattern 2.6%2Bgit20180818-2
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 93,888 kB
  • sloc: python: 28,119; xml: 15,085; makefile: 194
file content (78 lines) | stat: -rw-r--r-- 1,939 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#### PATTERN | VECTOR | WORDLIST ###################################################################
# Copyright (c) 2010 University of Antwerp, Belgium
# Author: Tom De Smedt <tom@organisms.be>
# License: BSD (see LICENSE.txt for details).
# http://www.clips.ua.ac.be/pages/pattern

####################################################################################################

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division

from builtins import str, bytes, dict, int
from builtins import map, zip, filter
from builtins import object, range

import os
from io import open

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""


class Wordlist(object):

    def __init__(self, name, data=[]):
        """ Lazy read-only list of words.
        """
        self._name = name
        self._data = data

    def _load(self):
        if not self._data:
            self._data = open(os.path.join(MODULE, self._name + ".txt")).read().split("\n")

    def __repr__(self):
        self._load()
        return repr(self._data)

    def __iter__(self):
        self._load()
        return iter(self._data)

    def __len__(self):
        self._load()
        return len(self._data)

    def __contains__(self, w):
        self._load()
        return w in self._data

    def __add__(self, iterable):
        self._load()
        return Wordlist(None, data=sorted(self._data + list(iterable)))

    def __getitem__(self, i):
        self._load()
        return self._data[i]

    def __setitem__(self, i, v):
        self._load()
        self._data[i] = v

    def insert(self, i, v):
        self._load()
        self._data.insert(i, v)

    def append(self, v):
        self._load()
        self._data.append(v)

    def extend(self, v):
        self._load()
        self._data.extend(v)

STOPWORDS = Wordlist("stopwords") # Russian stop words