File: ParseTree.py

package info (click to toggle)
zope-textindexng2 1%3A2.2.0-5
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 2,888 kB
  • ctags: 1,598
  • sloc: ansic: 6,836; python: 6,596; xml: 185; makefile: 137; sh: 41
file content (147 lines) | stat: -rw-r--r-- 4,208 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
###########################################################################
#
# TextIndexNG                The next generation TextIndex for Zope
#
# This software is governed by a license. See
# LICENSE.txt for the terms of this license.
#
###########################################################################

"""
ParseTree

$Id: ParseTree.py,v 1.23 2004/10/30 06:39:06 ajung Exp $
"""

from Products.TextIndexNG2.interfaces.IParseTreeNode import IParseTreeNode
from ResultSet import unionResultSets, intersectResultSets
from ResultSet import nearResultSets, phraseResultSets, inverseResultSet
import TXNGSplitter


class BaseNode:
    """ base class for all nodes """

    __implements__ = IParseTreeNode

    def __init__(self, v):
        if isinstance(self.__class__, BaseNode):
            raise ImplementationError, "don't instantiate BaseNode"
        self._value = v

    def getType(self): return self.__class__.__name__
    def getValue(self): return self._value

    def __cmp__(self, node):
        if self.getType()==node.getType() and \
           self.getValue()==node.getValue(): 
            return 0
        else: return -1 
           
    def __repr__(self):
        return "%s(%r)" % (self.getType(), self.getValue())


class WordNode(BaseNode):
    """ normal word """

class GlobNode(BaseNode):
    """ globbing """

class TruncNode(BaseNode):
    """ right truncation """

class SubstringNode(BaseNode):
    """ substring """

class LTruncNode(BaseNode):
    """ left truncation """

class SimNode(BaseNode):
    """ similarity """

class NotNode(BaseNode):
    """ NOT node """

class AndNode(BaseNode):
    """ AND node """

class OrNode(BaseNode):
    """ OR node """
    
class NearNode(BaseNode):
    """ NEAR node """

class PhraseNode(BaseNode):
    """ Phrase node """ 

class RangeNode(BaseNode):
    """ Range node """ 


class Evaluator:
    """ evaluator for a ParseTree instance """

    def __init__(self, index):
        self._index = index
        self._splitter = TXNGSplitter.TXNGSplitter(casefolding=0,separator=index.splitter_separators)

    def normalize_word(self, word):
        """ normalize a word according to the splitter seperators """

        # There seems to be a bug in the splitter where single characters
        # are returned as empty string

        res = self._splitter.split(word)
        if res:
            return self._splitter.split(word)[0]
        else:
            return word

    def __call__(self, node):

        if isinstance(node, WordNode):
            word =  node.getValue() 
            return self._index.lookupWord( self.normalize_word(word))

        elif isinstance(node, AndNode):
            sets = [ self(n) for n in node.getValue() ]
            return intersectResultSets(sets) 

        elif isinstance(node, OrNode):
            sets = [ self(n) for n in node.getValue() ]
            return unionResultSets(sets) 

        elif isinstance(node, GlobNode):
            return self._index.lookupWordsByPattern( node.getValue() )

        elif isinstance(node, TruncNode):
            return self._index.lookupWordsByTruncation(node.getValue(), right=1)

        elif isinstance(node, LTruncNode):
            return self._index.lookupWordsByTruncation(node.getValue(), left=1)

        elif isinstance(node, SubstringNode):
            return self._index.lookupWordsBySubstring(node.getValue())

        elif isinstance(node, RangeNode):
            return self._index.lookupRange(node.getValue()[0], node.getValue()[1])

        elif isinstance(node, SimNode):
            return self._index.lookupWordsBySimilarity( node.getValue() )

        elif isinstance(node, NotNode):
            rset = self(node.getValue())
            return inverseResultSet(rset, self._index)

        elif isinstance(node, NearNode):
            sets = [ self(n) for n in node.getValue() ]
            return nearResultSets(sets, self._index, distance=self.near_distance) 

        elif isinstance(node, PhraseNode):
            sets = [ self(n) for n in node.getValue() if n]
            return phraseResultSets(sets, self._index) 
            
        else:
            raise ValueError, node