File: 02-constraint.py

package info (click to toggle)
python-pattern 2.6%2Bgit20150109-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 78,672 kB
  • sloc: python: 53,865; xml: 11,965; ansic: 2,318; makefile: 94
file content (73 lines) | stat: -rw-r--r-- 2,475 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os, sys; sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.search import search, Pattern, Constraint
from pattern.en     import parsetree

# What we call a "search word" in example 01-search.py
# is actually called a constraint, because it can contain different options.
# Options are separated by "|".
# The next search pattern retrieves words that are a noun OR an adjective:
s = parsetree("big white rabbit")
print search("NN|JJ", s)
print

# This pattern yields phrases containing an adjective followed by a noun.
# Consecutive constraints are separated by a space:
print search("JJ NN", s)
print

# Or a noun preceded by any number of adjectives:
print search("JJ?+ NN", s)
print

# Note: NN marks singular nouns, NNS marks plural nouns.
# If you want to include both, use "NN*" as a constraint.
# This works for NN*, VB*, JJ*, RB*.

s = parsetree("When I sleep the big white rabbit will stare at my feet.")
m = search("rabbit stare at feet", s)
print s
print m
print
# Why does this work? 
# The word "will" is included in the result, even if the pattern does not define it.
# The pattern should break when it does not encounter "stare" after "rabbit."
# It works because "will stare" is one verb chunk.
# The "stare" constraint matches the head word of the chunk ("stare"),
# so "will stare" is considered an overspecified version of "stare".
# The same happens with "my feet" and the "rabbit" constraint,
# which matches the overspecified chunk "the big white rabbit".

p = Pattern.fromstring("rabbit stare at feet", s)
p.strict = True # Now it matches only what the pattern explicitly defines (=no match).
m = p.search(s)
print m
print

# Sentence chunks can be matched by tag (e.g. NP, VP, ADJP).
# The pattern below matches anything from
# "the rabbit gnaws at your fingers" to
# "the white rabbit looks at the carrots":
p = Pattern.fromstring("rabbit VP at NP", s)
m = p.search(s)
print m
print

if m:
    for w in m[0].words:
        print w, " \t=>", m[0].constraint(w)

print
print "-------------------------------------------------------------"
# Finally, constraints can also include regular expressions.
# To include them we need to use the full syntax instead of the search() function:
import re
r = re.compile(r"[0-9|\.]+") # all numbers
p = Pattern()
p.sequence.append(Constraint(words=[r]))
p.sequence.append(Constraint(tags=["NN*"]))

s = Sentence(parse("I have 9.5 fingers."))
print s
print p.search(s)
print