1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
|
CREATE TYPE closure_result AS (cvterm_id INTEGER, pathdistance INTEGER);
-- results may not be distinct
CREATE OR REPLACE FUNCTION _closure_over_IS_A(INTEGER, INTEGER) RETURNS SETOF INTEGER AS
$$
DECLARE
term ALIAS FOR $1;
is_a_id ALIAS FOR $2;
inner_row RECORD;
outer_row RECORD;
BEGIN
-- get direct is_a parents
FOR inner_row IN SELECT object_id AS cvterm_id
FROM cvterm_relationship WHERE subject_id = term AND type_id=is_a_id LOOP
-- return direct is_a parents
RETURN NEXT inner_row.cvterm_id;
-- recursive, self-call
FOR outer_row IN SELECT * FROM _closure_over_IS_A(inner_row.cvterm_id, is_a_id) LOOP
RETURN NEXT outer_row._closure_over_IS_A;
END LOOP;
END LOOP;
RETURN;
END;
$$
LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION closure_over_IS_A(INTEGER) RETURNS SETOF INTEGER AS
$$
DECLARE
is_a_id INTEGER;
r RECORD;
BEGIN
SELECT INTO is_a_id get_cvterm_id_for_is_a();
FOR r IN SELECT * FROM _closure_over_IS_A($1, is_a_id) LOOP
RETURN NEXT r._closure_over_IS_A;
END LOOP;
RETURN;
END;
$$
LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION closure_over_reflexive_IS_A(INTEGER) RETURNS SETOF INTEGER AS
$$
DECLARE
is_a_id INTEGER;
r RECORD;
BEGIN
SELECT INTO is_a_id get_cvterm_id_for_is_a();
FOR r IN SELECT * FROM _closure_over_IS_A($1, is_a_id)
UNION SELECT $1
LOOP
RETURN NEXT r._closure_over_IS_A;
END LOOP;
RETURN;
END;
$$
LANGUAGE 'plpgsql';
-- implements:
-- X R Z <= X is_a* Y, Y R Z
-- but not:
-- X R Z <= X R Y, Y is_a+ Z
CREATE OR REPLACE FUNCTION _closure_over_relation_with_dist_partial(INTEGER,INTEGER,INTEGER,INTEGER) RETURNS SETOF closure_result AS
$$
DECLARE
term ALIAS FOR $1;
relation_id ALIAS FOR $2;
is_a_id ALIAS FOR $3;
depth ALIAS FOR $4;
depth2 INTEGER;
isa_plus_r RECORD;
direct_link_r closure_result%ROWTYPE;
trans_link_r closure_result%ROWTYPE;
BEGIN
depth2 := depth+1;
-- get ALL is_a* parents
FOR isa_plus_r IN SELECT * FROM _closure_over_is_a(term,is_a_id) AS cvterm_id UNION SELECT term AS cvterm_id LOOP
RAISE NOTICE ''is_a %'', isa_plus_r.cvterm_id;
FOR direct_link_r IN SELECT object_id AS cvterm_id, depth2 AS pathdistance
FROM cvterm_relationship WHERE subject_id = isa_plus_r.cvterm_id AND type_id=relation_id LOOP
RAISE NOTICE ''obj % dist % / % '', direct_link_r.cvterm_id, depth2, direct_link_r.pathdistance;
-- return direct R parents
RETURN NEXT direct_link_r;
-- recursive, self-call
FOR trans_link_r IN SELECT *
FROM _closure_over_relation_with_dist_partial(direct_link_r.cvterm_id, relation_id, is_a_id, depth2) LOOP
RETURN NEXT trans_link_r;
END LOOP;
END LOOP;
END LOOP;
RETURN;
END;
$$
LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION closure_over_relation_with_dist(INTEGER,INTEGER) RETURNS SETOF closure_result AS
$$
DECLARE
is_a_id INTEGER;
r closure_result%ROWTYPE;
r_plus_is_a closure_result%ROWTYPE;
BEGIN
SELECT INTO is_a_id get_cvterm_id_for_is_a();
FOR r IN SELECT * FROM _closure_over_relation_with_dist_partial($1,$2,is_a_id,0) AS cvterm_id LOOP
FOR r_plus_is_a IN
SELECT cvterm_id, r.pathdistance FROM _closure_over_is_a(r.cvterm_id,is_a_id) AS cvterm_id
UNION SELECT r.* LOOP
RETURN NEXT r_plus_is_a;
END LOOP;
END LOOP;
RETURN;
END;
$$
LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION forward_chain() RETURNS INT AS
$$
DECLARE
num_assertions INTEGER;
total_assertions INTEGER;
BEGIN
total_assertions := 0;
PERFORM seed_cvtermpath();
-- keep adding new facts until exhausted
LOOP
SELECT INTO num_assertions forward_chain_iteration();
total_assertions := total_assertions + num_assertions;
RAISE LOG 'Asserted % new facts; current_total=%', num_assertions, total_assertions;
EXIT WHEN num_assertions > 0;
END LOOP;
RETURN total_assertions;
END;
$$ LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION seed_cvtermpath() RETURNS INT AS
$$
DECLARE
num_assertions INTEGER;
is_a_id INTEGER;
BEGIN
RAISE LOG 'Seeding cvtermpath';
-- seed cvtermpath with direct asserted links
INSERT INTO cvtermpath
(subject_id,type_id,object_id,pathdistance)
SELECT DISTINCT
link.subject_id,
link.type_id,
link.object_id,
1
FROM
cvterm_relationship AS link
LEFT JOIN cvtermpath AS p1 USING (subject_id,type_id,object_id) -- exclude pre-populated links
WHERE p1.cvtermpath_id IS NULL; -- this is how we do a SELECT MINUS
GET DIAGNOSTICS num_assertions = ROW_COUNT;
RAISE LOG 'Seeded cvtermpath with % links', num_assertions;
SELECT INTO is_a_id get_cvterm_id_for_is_a();
-- seed cvtermpath with reflexive is_a
INSERT INTO cvtermpath
(subject_id,type_id,object_id,pathdistance)
SELECT DISTINCT
t.cvterm_id,
is_a_id,
t.cvterm_id,
0
FROM
cvterm AS t
LEFT JOIN cvtermpath AS p1 ON
(p1.subject_id=t.cvterm_id,p1.type_id=t.is_a_id,p1.object_id=t.cvterm_id) -- exclude pre-populated links
WHERE p1.cvtermpath_id IS NULL; -- this is how we do a SELECT MINUS
GET DIAGNOSTICS num_assertions = ROW_COUNT;
RAISE LOG 'Seeded cvtermpath with % links', num_assertions;
RETURN num_assertions;
END;
$$ LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION forward_chain_iteration() RETURNS INT AS
$$
DECLARE
num_assertions INTEGER;
total_assertions INTEGER;
BEGIN
total_assertions := 0;
-- add your own rules here:
SELECT INTO num_assertions forward_chain_iteration_rule_is_a();
total_assertions := total_assertions + num_assertions;
-- end of rules section
RETURN total_assertions;
END;
$$ LANGUAGE 'plpgsql';
-- IS_A rule
-- X is_a Y, Y is_a Z => X is_a Z
CREATE OR REPLACE FUNCTION forward_chain_iteration_rule_is_a() RETURNS INT AS
$$
DECLARE
is_a_id INTEGER;
num_assertions INTEGER;
BEGIN
RAISE LOG 'Applying rule: IS_A';
SELECT INTO is_a_id get_cvterm_id_for_is_a();
INSERT INTO cvtermpath
(subject_id,type_id,object_id,pathdistance)
SELECT DISTINCT
p.subject_id,
is_a_id,
link.object_id,
p.pathdistance+1
FROM
cvtermpath AS p -- extend this
INNER JOIN cvterm_relationship AS link ON (p.object_id = link.subject_id) -- using this
LEFT JOIN cvtermpath AS p1 ON (p1.subject_id=p.subject_id AND p1.object_id=link.object_id AND p1.type_id=is_a_id) -- excluding this [pathdist??]
WHERE link.type_id=is_a_id
AND p.type_id=is_a_id
AND p1.cvtermpath_id IS NULL; -- exclude existing paths
GET DIAGNOSTICS num_assertions = ROW_COUNT;
RAISE LOG 'RULE:IS_A iteration produced % links', num_assertions;
RETURN num_assertions;
END;
$$ LANGUAGE 'plpgsql';
-- TRANSITIVE-OVER-IS_A rule
-- X is_a+ Y, Y R Z => X R Z
CREATE OR REPLACE FUNCTION forward_chain_iteration_rule_trans_over_is_a() RETURNS INT AS
$$
DECLARE
is_a_id INTEGER;
num_assertions INTEGER;
BEGIN
RAISE LOG 'Applying rule: TRANS_OVER_IS_A';
SELECT INTO is_a_id get_cvterm_id_for_is_a();
INSERT INTO cvtermpath
(subject_id,type_id,object_id,pathdistance)
SELECT DISTINCT
p_is_a.subject_id,
p_rel.type_id
p_rel.object_id,
p_rel.pathdistance+1
FROM
cvtermpath AS p_is_a -- extend this
INNER JOIN cvtermpath AS p_rel ON (p_is_a.object_id = p_rel.subject_id) -- using this
LEFT JOIN cvtermpath AS p1 ON (p1.subject_id=p_is_a.subject_id AND p1.object_id=p_rel.object_id AND p1.type_id=p_rel.type_id) -- excluding this [pathdist??]
WHERE p_is_a.type_id=is_a_id
AND p1.cvtermpath_id IS NULL; -- exclude existing paths
GET DIAGNOSTICS num_assertions = ROW_COUNT;
RAISE LOG 'Applied rule:TRANS_OVER_IS_A iteration produced % links', num_assertions;
RETURN num_assertions;
END;
$$ LANGUAGE 'plpgsql';
|