File: 03-lemmata.py

package info (click to toggle)
python-pattern 2.6%2Bgit20180818-2
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 93,888 kB
  • sloc: python: 28,119; xml: 15,085; makefile: 194
file content (39 lines) | stat: -rw-r--r-- 1,257 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from __future__ import print_function
from __future__ import unicode_literals

from builtins import str, bytes, dict, int

import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.search import search, match
from pattern.en import parsetree

# This example demonstrates an interesting search pattern that mines for comparisons.
# Notice the use of the constraint "be".
# If the output from the parser includes word lemmas (e.g., "doing" => "do")
# these will also be matched. Using "be" then matches "is", "being", "are", ...
# and if underspecification is used "could be", "will be", "definitely was", ...

p = "NP be ADJP|ADVP than NP"

for s in (
  "the turtle was faster than the hare",
  "Arnold Schwarzenegger is more dangerous than Dolph Lundgren"):
    t = parsetree(s, lemmata=True)  # parse lemmas
    m = search(p, t)
    if m:
        # Constituents for the given constraint indices:
        # 0 = NP, 2 = ADJP|ADVP, 4 = NP
        print(m[0].constituents(constraint=[0, 2, 4]))
        print("")


p = "NP be ADJP|ADVP than NP"
t = parsetree("the turtle was faster than the hare", lemmata=True)
m = match(p, t)
print(t)
print("")
for w in m.words:
    print("%s\t=> %s" % (w, m.constraint(w)))