File: relalg.py

package info (click to toggle)
python-kjbuckets 2.2-2
  • links: PTS
  • area: main
  • in suites: slink
  • size: 264 kB
  • ctags: 266
  • sloc: ansic: 2,581; python: 363; makefile: 53
file content (162 lines) | stat: -rw-r--r-- 5,232 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# A simple implementation of the relational algebra
#  using kjbuckets
from kjbuckets import *

def relFromDictSet(schemeseq, dictSet):
    result = relation(schemeseq, [] )
    result.rowset = dictSet
    return result

class relation:

  def __init__(self, schemeseq, listofrows):
      self.schemeseq = schemeseq
      self.scheme = kjSet(schemeseq)
      rowset = kjSet()
      for row in listofrows:
          rowset.add(kjUndump(schemeseq, row))
      self.rowset = rowset

  def pprint(self):
      print self.schemeseq
      print "============"
      for row in self.rowset.items():
          print row.dump(self.schemeseq)

  def addDicts(self, dictseq): # not used...
      for dict in dictseq:
          self.rowset.add(dict)

  def checkUnionCompatible(self,other):
      if self.scheme != other.scheme:
         raise ValueError, "operands not union compatible"

  # relational union
  def __add__(self, other):
      self.checkUnionCompatible(other)
      return relFromDictSet(self.schemeseq, self.rowset + other.rowset)

  # relational difference
  def __sub__(self, other):
      self.checkUnionCompatible(other)
      return relFromDictSet(self.schemeseq, self.rowset - other.rowset)

  # natural join (hash based algorithm)
  def __mul__(self,other):
      commonatts = self.scheme & other.scheme
      resultset = kjSet()
      if commonatts: # do a hash based join
         dumper = tuple(commonatts.items())
         selfgraph = kjGraph() # hash index for self
         othergraph = kjGraph() # hash index for other
         for row in self.rowset.items():
             selfgraph[row] = row.dump(dumper)
         for row in other.rowset.items():
             othergraph[row.dump(dumper)] = row
         for (selfrow, otherrow) in (selfgraph * othergraph).items():
             resultset.add(selfrow + otherrow)
      else: # no common attributes: do a cross product
         otherrows = other.rowset.items()
         for selfrow in self.rowset.items():
             for otherrow in otherrows:
                 resultset.add(selfrow + otherrow)
      return relFromDictSet( tuple((self.scheme + other.scheme).items()),
                             resultset )

# selection using a att->value pairs (as conjunction)
def vSel(pairs, rel):
    selected = kjSet()
    selector = kjDict(pairs)
    if selector.Clean()!=None:
        for row in rel.rowset.items():
            if (row + selector).Clean() != None:
               selected.add(row)
    return relFromDictSet(rel.schemeseq, selected)

# selection using att = att pairs (as conjunction)
def eqSelect(pairs, rel):
    selected = kjSet()
    selector = kjGraph(pairs)
    selector = (selector + ~selector).tclosure() # sym, trans closure
    for row in rel.rowset.items():
        if row.remap(selector) != None:
           selected.add(row)
    return relFromDictSet(rel.schemeseq, selected)

# projection on attribute sequence (as conjunction)
def proj(atts, rel):
    attset = kjSet(atts)
    resultset = kjSet()
    for row in rel.rowset.items():
        resultset.add(attset * row)
    return relFromDictSet(atts, resultset)

# renaming using (new,old) pair sequence
def rename(pairs, rel):
    renames = kjDict(pairs)
    untouched = rel.scheme - kjSet(renames.values())
    mapper = renames + untouched
    resultset = kjSet()
    for row in rel.rowset.items():
        resultset.add(mapper * row)
    return relFromDictSet(tuple(mapper.keys()), resultset)

#=========== end of simple.py
#
#Now let me show you the "simple" module in use.  First we need some relations.
#I'll steal C.J.Date's canonical/soporific supplier/parts database:
#
## database of suppliers, parts and shipments
##  from Date, page 79 (2nd ed) or page 92 (3rd ed) */
def test():
   #suppliers
   S = relation(
      ('snum', 'sname', 'status', 'city'),
      [ (1,   'Smith', 20,     'London'),
        (2,   'Jones', 10,     'Paris'),
        (3,   'Blake', 30,     'Paris'),
        (4,   'Clark', 20,     'London'),
        (5,   'Adams', 30,     'Athens')
      ])
   #parts
   P = relation(
      ('pnum', 'pname', 'color', 'weight', 'pcity'),
      [ (1,   'Nut',   'Red',   12,     'London'),
        (2,   'Bolt',  'Green', 17,     'Paris' ),
        (3,   'Screw', 'Blue',  17,     'Rome'  ),
        (4,   'Screw', 'Red',   14,     'London'),
        (5,   'Cam',   'Blue',  12,     'Paris'),
        (6,   'Cog',   'Red',   19,     'London')
      ])
   # shipments
   SP = relation(
      ('snum', 'pnum', 'qty',),
      [ (1,   1,   300),
        (1,   2,   200),
        (1,   3,   400),
        (1,   4,   200),
        (1,   5,   100),
        (1,   6,   100),
        (2,   1,   300),
        (2,   2,   400),
        (3,   2,   200),
        (4,   2,   200),
        (4,   4,   300),
        (4,   5,   400)
      ])
   
   # names and cities of suppliers
   proj(("sname","city"),S).pprint()
   
   # part names of parts supplied by Blake
   proj(("pname",),vSel( ( ("sname","Blake"), ), S*SP*P)).pprint()

   
    # supplier names and numbers where the supplier doesn't supply screws
   ( proj( ("sname","snum"), S) -
      proj( ("sname","snum"),
            vSel( ( ("pname", "Screw"), ), P*SP*S )
    ) ).pprint()
   

if __name__=="__main__": test()