File: deductive_closure.plpgsql

package info (click to toggle)
libchado-perl 1.23-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 23,976 kB
  • ctags: 10,378
  • sloc: xml: 192,540; sql: 165,945; perl: 28,339; sh: 101; python: 73; makefile: 46
file content (262 lines) | stat: -rw-r--r-- 8,405 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
CREATE TYPE closure_result AS (cvterm_id INTEGER, pathdistance INTEGER);

-- results may not be distinct
CREATE OR REPLACE FUNCTION _closure_over_IS_A(INTEGER, INTEGER) RETURNS SETOF INTEGER AS
$$
DECLARE
    term        ALIAS FOR $1;
    is_a_id     ALIAS FOR $2;
    inner_row               RECORD;
    outer_row               RECORD;
BEGIN
    -- get direct is_a parents
    FOR inner_row IN SELECT object_id AS cvterm_id 
                 FROM cvterm_relationship WHERE subject_id = term AND type_id=is_a_id LOOP
        -- return direct is_a parents
        RETURN NEXT inner_row.cvterm_id;

        -- recursive, self-call
        FOR outer_row IN SELECT * FROM _closure_over_IS_A(inner_row.cvterm_id, is_a_id) LOOP
            RETURN NEXT outer_row._closure_over_IS_A;
        END LOOP;
    END LOOP;
    RETURN;
END;
$$
LANGUAGE 'plpgsql';

CREATE OR REPLACE FUNCTION closure_over_IS_A(INTEGER) RETURNS SETOF INTEGER AS
$$
DECLARE
    is_a_id           INTEGER;
    r                 RECORD;
BEGIN
    SELECT INTO is_a_id get_cvterm_id_for_is_a();
    FOR r IN SELECT * FROM  _closure_over_IS_A($1, is_a_id) LOOP
        RETURN NEXT r._closure_over_IS_A;
    END LOOP;
    RETURN;
END;
$$
LANGUAGE 'plpgsql';

CREATE OR REPLACE FUNCTION closure_over_reflexive_IS_A(INTEGER) RETURNS SETOF INTEGER AS
$$
DECLARE
    is_a_id           INTEGER;
    r                 RECORD;
BEGIN
    SELECT INTO is_a_id get_cvterm_id_for_is_a();
    FOR r IN SELECT * FROM  _closure_over_IS_A($1, is_a_id)
             UNION SELECT $1 
           LOOP
        RETURN NEXT r._closure_over_IS_A;
    END LOOP;
    RETURN;
END;
$$
LANGUAGE 'plpgsql';

-- implements:
--  X R Z <= X is_a* Y, Y R Z
-- but not:
--  X R Z <= X R Y, Y is_a+ Z
CREATE OR REPLACE FUNCTION _closure_over_relation_with_dist_partial(INTEGER,INTEGER,INTEGER,INTEGER) RETURNS SETOF closure_result AS
$$
DECLARE
    term                ALIAS FOR $1; 
    relation_id         ALIAS FOR $2;
    is_a_id             ALIAS FOR $3;
    depth               ALIAS FOR $4;
    depth2              INTEGER;
    isa_plus_r          RECORD;
    direct_link_r       closure_result%ROWTYPE;
    trans_link_r        closure_result%ROWTYPE;
BEGIN
    depth2 := depth+1;
    -- get ALL is_a* parents
    FOR isa_plus_r IN SELECT * FROM _closure_over_is_a(term,is_a_id) AS cvterm_id UNION SELECT term AS cvterm_id LOOP

       RAISE NOTICE ''is_a %'', isa_plus_r.cvterm_id;
       FOR direct_link_r IN SELECT object_id AS cvterm_id, depth2 AS pathdistance
                     FROM cvterm_relationship WHERE subject_id = isa_plus_r.cvterm_id AND type_id=relation_id LOOP
           RAISE NOTICE ''obj % dist % / % '', direct_link_r.cvterm_id, depth2, direct_link_r.pathdistance;
           -- return direct R parents
           RETURN NEXT direct_link_r;
           -- recursive, self-call
           FOR trans_link_r IN SELECT *
                 FROM _closure_over_relation_with_dist_partial(direct_link_r.cvterm_id, relation_id, is_a_id, depth2) LOOP
               RETURN NEXT trans_link_r;
           END LOOP;
       END LOOP;
    END LOOP;
    RETURN;
END;
$$
LANGUAGE 'plpgsql';

CREATE OR REPLACE FUNCTION closure_over_relation_with_dist(INTEGER,INTEGER) RETURNS SETOF closure_result AS
$$
DECLARE
    is_a_id           INTEGER;
    r                 closure_result%ROWTYPE;
    r_plus_is_a       closure_result%ROWTYPE;
BEGIN
    SELECT INTO is_a_id get_cvterm_id_for_is_a();
    FOR r IN SELECT * FROM  _closure_over_relation_with_dist_partial($1,$2,is_a_id,0) AS cvterm_id LOOP
       FOR r_plus_is_a IN 
           SELECT cvterm_id, r.pathdistance FROM  _closure_over_is_a(r.cvterm_id,is_a_id) AS cvterm_id
           UNION SELECT r.* LOOP
             RETURN NEXT r_plus_is_a;
       END LOOP;
    END LOOP;
    RETURN;
END;
$$
LANGUAGE 'plpgsql';

CREATE OR REPLACE FUNCTION forward_chain() RETURNS INT AS
$$
DECLARE
  num_assertions        INTEGER;
  total_assertions      INTEGER;
BEGIN
  total_assertions := 0;

  PERFORM seed_cvtermpath();

  -- keep adding new facts until exhausted
  LOOP
    SELECT INTO num_assertions forward_chain_iteration();
    total_assertions := total_assertions + num_assertions;

    RAISE LOG 'Asserted % new facts; current_total=%', num_assertions, total_assertions;
    EXIT WHEN num_assertions > 0;
  END LOOP;   
  RETURN total_assertions;
END;
$$ LANGUAGE 'plpgsql';

CREATE OR REPLACE FUNCTION seed_cvtermpath() RETURNS INT AS
$$
DECLARE
  num_assertions        INTEGER;
  is_a_id               INTEGER;
BEGIN
  RAISE LOG 'Seeding cvtermpath';

  -- seed cvtermpath with direct asserted links
  INSERT INTO cvtermpath 
    (subject_id,type_id,object_id,pathdistance)
    SELECT DISTINCT
      link.subject_id,
      link.type_id,
      link.object_id,
      1
     FROM 
      cvterm_relationship AS link
      LEFT JOIN cvtermpath AS p1 USING (subject_id,type_id,object_id) -- exclude pre-populated links
     WHERE  p1.cvtermpath_id IS NULL; -- this is how we do a SELECT MINUS
  GET DIAGNOSTICS num_assertions = ROW_COUNT;
  RAISE LOG 'Seeded cvtermpath with % links', num_assertions;

  SELECT INTO is_a_id get_cvterm_id_for_is_a();
  -- seed cvtermpath with reflexive is_a
  INSERT INTO cvtermpath 
    (subject_id,type_id,object_id,pathdistance)
    SELECT DISTINCT
      t.cvterm_id,
      is_a_id,
      t.cvterm_id,
      0
     FROM 
      cvterm AS t
      LEFT JOIN cvtermpath AS p1 ON 
        (p1.subject_id=t.cvterm_id,p1.type_id=t.is_a_id,p1.object_id=t.cvterm_id) -- exclude pre-populated links
     WHERE  p1.cvtermpath_id IS NULL; -- this is how we do a SELECT MINUS
  GET DIAGNOSTICS num_assertions = ROW_COUNT;
  RAISE LOG 'Seeded cvtermpath with % links', num_assertions;

  RETURN num_assertions;
END;
$$ LANGUAGE 'plpgsql';

CREATE OR REPLACE FUNCTION forward_chain_iteration() RETURNS INT AS
$$
DECLARE
  num_assertions        INTEGER;
  total_assertions      INTEGER;
BEGIN
  total_assertions := 0;

  -- add your own rules here:
  SELECT INTO num_assertions forward_chain_iteration_rule_is_a();
  total_assertions := total_assertions + num_assertions;
  -- end of rules section

  RETURN total_assertions;
END;
$$ LANGUAGE 'plpgsql';

-- IS_A rule
-- X is_a Y, Y is_a Z => X is_a Z
CREATE OR REPLACE FUNCTION forward_chain_iteration_rule_is_a() RETURNS INT AS
$$
DECLARE
    is_a_id           INTEGER;
    num_assertions    INTEGER;
BEGIN
    RAISE LOG 'Applying rule: IS_A';
    SELECT INTO is_a_id get_cvterm_id_for_is_a();
    INSERT INTO cvtermpath 
      (subject_id,type_id,object_id,pathdistance)
      SELECT DISTINCT
        p.subject_id,
        is_a_id,
        link.object_id,
        p.pathdistance+1
       FROM 
        cvtermpath AS p -- extend this
        INNER JOIN cvterm_relationship AS link ON (p.object_id = link.subject_id) -- using this
        LEFT JOIN cvtermpath AS p1 ON (p1.subject_id=p.subject_id AND p1.object_id=link.object_id AND p1.type_id=is_a_id) -- excluding this [pathdist??]
       WHERE  link.type_id=is_a_id 
         AND  p.type_id=is_a_id 
         AND  p1.cvtermpath_id IS NULL; -- exclude existing paths
     
    GET DIAGNOSTICS num_assertions = ROW_COUNT;
    RAISE LOG 'RULE:IS_A iteration produced % links', num_assertions;
    RETURN num_assertions;
END;
$$ LANGUAGE 'plpgsql';
  
-- TRANSITIVE-OVER-IS_A rule
-- X is_a+ Y, Y R Z => X R Z
CREATE OR REPLACE FUNCTION forward_chain_iteration_rule_trans_over_is_a() RETURNS INT AS
$$
DECLARE
    is_a_id           INTEGER;
    num_assertions    INTEGER;
BEGIN
    RAISE LOG 'Applying rule: TRANS_OVER_IS_A';
    SELECT INTO is_a_id get_cvterm_id_for_is_a();
    INSERT INTO cvtermpath 
      (subject_id,type_id,object_id,pathdistance)
      SELECT DISTINCT
        p_is_a.subject_id,
        p_rel.type_id
        p_rel.object_id,
        p_rel.pathdistance+1
       FROM 
        cvtermpath AS p_is_a  -- extend this
        INNER JOIN cvtermpath AS p_rel ON (p_is_a.object_id = p_rel.subject_id) -- using this
        LEFT JOIN cvtermpath AS p1 ON (p1.subject_id=p_is_a.subject_id AND p1.object_id=p_rel.object_id AND p1.type_id=p_rel.type_id) -- excluding this [pathdist??]
       WHERE  p_is_a.type_id=is_a_id 
         AND  p1.cvtermpath_id IS NULL; -- exclude existing paths
     
    GET DIAGNOSTICS num_assertions = ROW_COUNT;
    RAISE LOG 'Applied rule:TRANS_OVER_IS_A iteration produced % links', num_assertions;
    RETURN num_assertions;
END;
$$ LANGUAGE 'plpgsql';