File: inflect.py

package info (click to toggle)
python-pattern 2.6%2Bgit20180818-2
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 93,888 kB
  • sloc: python: 28,119; xml: 15,085; makefile: 194
file content (147 lines) | stat: -rw-r--r-- 4,246 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#### PATTERN | XX | INFLECT ########################################################################
# -*- coding: utf-8 -*-
# Copyright (c)
# Author:
# License:
# http://www.clips.ua.ac.be/pages/pattern

####################################################################################################
# Template for pattern.xx.inflect with functions for word inflection in language XXXXX.
# inflection is the modification of a word to express different grammatical categories,
# such as tense, mood, voice, aspect, person, number, gender and case.
# Conjugation is the inflection of verbs.
# To construct a lemmatizer for pattern.xx.parser.find_lemmata(),
# we need functions for noun singularization, verb infinitives, predicate adjectives, etc.

from __future__ import unicode_literals
from __future__ import division

from builtins import str, bytes, dict, int
from builtins import map, zip, filter
from builtins import object, range

import os
import sys
import re

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

sys.path.insert(0, os.path.join(MODULE, "..", "..", "..", ".."))

# Import Verbs base class and verb tenses.
from pattern.text import Verbs as _Verbs
from pattern.text import (
    INFINITIVE, PRESENT, PAST, FUTURE,
    FIRST, SECOND, THIRD,
    SINGULAR, PLURAL, SG, PL,
    PROGRESSIVE,
    PARTICIPLE
)

sys.path.pop(0)

VERB, NOUN, ADJECTIVE, ADVERB = "VB", "NN", "JJ", "RB"

VOWELS = "aeiouy"
re_vowel = re.compile(r"a|e|i|o|u|y", re.I)
is_vowel = lambda ch: ch in VOWELS

#### ARTICLE #######################################################################################

# Inflection gender.
MASCULINE, FEMININE, NEUTER, PLURAL = \
    MALE, FEMALE, NEUTRAL, PLURAL = \
        M, F, N, PL = "m", "f", "n", "p"


def definite_article(word):
    """ Returns the definite article for a given word.
    """
    return "the"


def indefinite_article(word):
    """ Returns the indefinite article for a given word.
    """
    return "a"

DEFINITE, INDEFINITE = \
    "definite", "indefinite"


def article(word, function=INDEFINITE):
    """ Returns the indefinite or definite article for the given word.
    """
    return function == DEFINITE \
       and definite_article(word) \
        or indefinite_article(word)

_article = article


def referenced(word, article=INDEFINITE):
    """ Returns a string with the article + the word.
    """
    return "%s %s" % (_article(word, article), word)

#### PLURALIZE ######################################################################################


def pluralize(word, pos=NOUN, custom={}):
    """ Returns the plural of a given word.
    """
    return word + "s"

#### SINGULARIZE ###################################################################################


def singularize(word, pos=NOUN, custom={}):
    """ Returns the singular of a given word.
    """
    return word.rstrip("s")

#### VERB CONJUGATION ##############################################################################
# The verb table was trained on CELEX and contains the top 2000 most frequent verbs.


class Verbs(_Verbs):

    def __init__(self):
        _Verbs.__init__(self, os.path.join(MODULE, "xx-verbs.txt"),
            language = "xx",
              # The order of tenses in the given file; see pattern.text.__init__.py => Verbs.
              format = [0, 1, 2, 3, 7, 8, 17, 18, 19, 23, 25, 24, 16, 9, 10, 11, 15, 33, 26, 27, 28, 32],
             default = {}
            )

    def find_lemma(self, verb):
        """ Returns the base form of the given inflected verb, using a rule-based approach.
        """
        return verb

    def find_lexeme(self, verb):
        """ For a regular verb (base form), returns the forms using a rule-based approach.
        """
        return []

verbs = Verbs()

conjugate, lemma, lexeme, tenses = \
    verbs.conjugate, verbs.lemma, verbs.lexeme, verbs.tenses

#### ATTRIBUTIVE & PREDICATIVE #####################################################################


def attributive(adjective):
    """ For a predicative adjective, returns the attributive form.
    """
    return adjective


def predicative(adjective):
    """ Returns the predicative adjective.
    """
    return adjective