File: PositionMap.py

package info (click to toggle)
zope-textindexng2 1%3A2.2.0-5
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 2,888 kB
  • ctags: 1,598
  • sloc: ansic: 6,836; python: 6,596; xml: 185; makefile: 137; sh: 41
file content (104 lines) | stat: -rw-r--r-- 2,941 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
###########################################################################
#
# TextIndexNG                The next generation TextIndex for Zope
#
# This software is governed by a license. See
# LICENSE.txt for the terms of this license.
#
###########################################################################

""" 
functions for checking positions of words in documents 

$Id: PositionMap.py,v 1.12 2003/07/09 17:33:47 ajung Exp $
"""


class PositionMap:
    """ position map for word positions inside a document """

    def __init__(self):
        self.map = []

    def append(self, word, poslst):
        self.map.append( (word, poslst) )

    def __str__(self):
        return '\n'.join([ '\t"%s": %s' % (w,lst) for w,lst in self.map ])

    __repr__ = __str__


    def checkPositionMapBidirectional(self, near_distance):
        """ check if a PositionMap represents a valid match for
            a near search (bidirectional search)
        """

        # a posMap is a list of tuples (word, IISet() ), where
        # the IISet is a list of posititions of that word inside 
        # one document

        min_poslst = None 
        valid_positions = []

        for word, poslst in self.map:

            if min_poslst:
                if len(poslst) < len(min_poslst):
                    min_poslst = poslst
            else:
                min_poslst = poslst


        for pos in min_poslst:

            # perform a range search over all position lists

            num = 0        
            for word, poslst in self.map:
                keys = poslst.keys( pos - near_distance, 
                                    pos + near_distance)
                if len(keys) > 0: num += 1

            if num == len(self.map):
                valid_positions.append(pos) 
        
        return valid_positions


    def checkPositionMapUnidirectional(self, near_distance):
        """ check if a PositionMap represents a valid match for
            a near search (unidirectional search)
        """

        # a posMap is a list of tuples (word, IISet() ), where
        # the IISet is a list of posititions of that word inside 
        # one document

        valid_positions = []
        min_poslst = None 

        for word, poslst in self.map:
            if min_poslst:
                if len(poslst) < len(min_poslst):
                    min_poslst = poslst
            else:
                min_poslst = poslst

        for pos in self.map[0][1]:

            # perform a range search over all position lists

            num = 0 
            for i in range(len(self.map)):
                word, poslst = self.map[i]       
                keys = poslst.keys( pos + i - near_distance, 
                                    pos + i + near_distance)
                if len(keys) > 0: num+=1

            if num == len(self.map):
                valid_positions.append(pos) 
        
        return valid_positions