File: joins.py

package info (click to toggle)
sql-editor 0.4
  • links: PTS
  • area: main
  • in suites: woody
  • size: 116 kB
  • ctags: 140
  • sloc: python: 1,091; makefile: 46; sh: 28
file content (92 lines) | stat: -rw-r--r-- 3,201 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

__doc__=r"""
the dbGtk module can "automatically" recognize and display the joins,
using the "join rules"

these rules have the following format
't1/f1 t2/f2'
(apixes are inserted here, but should not be inserted in the rule!)
which means that the field f1 in table t1 will be joined to the field f2 in
table t2. In the following, 't1/f1' will be called 'input' and 't2/f2'
the 'output'

Additionally, the rules can use regular expressions and substitutions.
There is also a useful twist: the wildcard character % can be used
in the input (it will match any repetition of characters in the range
0-9a-zA-Z); then '\1' , '\2' ... can be used
in the output to replace the matching of successive '%'

So the following rule
    '%/%_%_id \3s/\3_id'
will join the field 'reference_user_id' in any table
to the field 'user_id' in table 'users'; while the rule
    'tbl%/id% tbl\2s/id'
will join the field 'idUser' in any table whose name starts by 'tbl',
to the field 'id' in table 'tblUsers'

For python experts: '%' is substituted by '([0-9a-zA-Z]*)' in the pattern;
the pattern 't1/f1' is used to replace into the real value of 't1' and 'f1',
and this produce the real value for 't2' and 'f2': the commands are
         REALt2 = re.sub(t1f1, t2, REALt1f1  )
        REALf2 = re.sub(t1f1, f2, REALt1f1  )
See section 4.2 in the python library reference,
or http://www.python.org/doc/current/lib/re-objects.html
"""

import re
import string

def preprocess_rules(rules):
    """returns a list of triples ( t1f1 , t2 , f2 ) where
    t1f1='t1/f1' is the rule for the ..
    """
    rl=[]
    for r in rules:
        #be kind to people, and reformat
        r=re.sub('\t+',' ',r)
        r=re.sub(' +',' ',r)
        #
        rs=string.split(r,' ')
        if len(rs)!=2:
            print 'ERROR: there must be only one space in join rule:'+repr(r)
        else:
            x=string.split(rs[1], '/')
            y=string.split(rs[0], '/')
            if len(x)!=2 or len(y) != 2:
                print 'ERROR: there must be one / in each side of join rule:'+repr(r)
            else:
                (b, c)=x
                a=re.sub('%', r'([0-9a-zA-Z]*)', rs[0])
                rl.append( (a,b,c)  )
    return rl

def find_join(tablein, fieldin, rules,
              #this is a dictionary that lists the fields in the tables
              table_fields):
    procrules=preprocess_rules(rules)
    REALt1f1=tablein +'/'+ fieldin
    for r in procrules:
        ( t1f1 , t2 , f2 )=r
        REALt2 = re.sub(t1f1, t2, REALt1f1  )
        REALf2 = re.sub(t1f1, f2, REALt1f1  )
        #print "REALt2 "+repr(REALt2)+"  REALf2"+ repr(REALf2)
        if REALt2 in table_fields.keys():
            if REALf2 in table_fields[REALt2]:
                return (REALt2, REALf2)
    return None





def test():
    #note the RAW attribute to the rules
    rules=[r'%/%_%_id \3s/\3_id'   ,      r'tbl%/id% tbl\2s/id']
    table_fields={'users' : [ 'user_id','lastname' ],
                  'tblUsers' : ['id', 'firstname'  ] }
    print find_join('anytable','reference_user_id', rules, table_fields )
    print find_join('tblAny','idUser', rules, table_fields )

            
if __name__ == '__main__':
    test()