File: godb-bridge.plpgsql

package info (click to toggle)
libchado-perl 1.31-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 44,716 kB
  • sloc: sql: 282,721; xml: 192,553; perl: 25,524; sh: 102; python: 73; makefile: 57
file content (495 lines) | stat: -rw-r--r-- 13,253 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
CREATE SCHEMA godb;

--Activate this to make this bridge take precedence
--SET SEARCH PATH TO godb,public;
-- (note that placing godb first in the search path means
--  that godb.dbxref takes precedence over public.dbxref)

CREATE TABLE godb.instance_data (
  release_name varchar(255),
  release_type varchar(255),
  release_notes text
);

--- helper views

--view
CREATE OR REPLACE VIEW godb.v_go_acc AS
 SELECT 
  dbxref_id                          AS dbxref_id,
  db.name || ':' || dbxref.accession AS acc
 FROM public.dbxref
  INNER JOIN public.db USING (db_id);
--materialized view
CREATE TABLE godb.go_acc (
  dbxref_id bigint,
  acc text
);
CREATE INDEX go_acc_idx1 ON godb.go_acc (dbxref_id);
CREATE INDEX go_acc_idx2 ON godb.go_acc (acc);
--load
INSERT INTO godb.go_acc SELECT * FROM godb.v_go_acc;

--- dbxref [clash]

--view
CREATE OR REPLACE VIEW godb.v_dbxref AS
 SELECT
  dbxref_id                          AS id,
  db.name                            AS xref_dbname,
  dbxref.accession                   AS xref_key,
  CAST(NULL AS VARCHAR)              AS xref_keytype,
  dbxref.description                 AS xref_desc
 FROM public.dbxref
  INNER JOIN public.db USING (db_id);
--materialized view
CREATE TABLE godb.dbxref (
  id bigint,
  xref_dbname varchar(255),
  xref_key varchar(255),
  xref_keytype varchar(255),
  xref_desc text
);
CREATE INDEX dbxref_idx1 ON godb.dbxref (id);
CREATE INDEX dbxref_idx2 ON godb.dbxref (xref_dbname);
CREATE INDEX dbxref_idx3 ON godb.dbxref (xref_key);
CREATE INDEX dbxref_idx4 ON godb.dbxref (xref_keytype);
CREATE INDEX dbxref_idx5 ON godb.dbxref (xref_desc);
--load
INSERT INTO godb.dbxref SELECT * FROM godb.v_dbxref;


--
--/allen
--

-- db [clash] (note in godb, db is only refered to from association table)

--view
CREATE OR REPLACE VIEW godb.v_db AS
 SELECT
  db_id                                 AS id,
  name                                  AS name,
  name                                  AS fullname,
  CAST(NULL AS VARCHAR)                 AS datatype,
  CAST(NULL AS VARCHAR)                 AS url_syntax
 FROM public.db;
--materialized view
CREATE TABLE godb.db (
  id bigint,
  name varchar(255),
  fullname varchar(255),
  datatype varchar(255),
  url_syntax varchar(255)
);
CREATE INDEX db_idx1 ON godb.db (id);
CREATE INDEX db_idx2 ON godb.db (name);
CREATE INDEX db_idx3 ON godb.db (fullname);
CREATE INDEX db_idx4 ON godb.db (datatype);
CREATE INDEX db_idx5 ON godb.db (url_syntax);
--load
INSERT INTO godb.db SELECT * FROM godb.v_db;
  
--- term

--view
CREATE OR REPLACE VIEW godb.v_term AS
SELECT
 cvterm_id     AS id,
 godb.go_acc.acc AS acc,
 name          AS name,
 is_obsolete   AS is_obsolete,
 0             AS is_root
FROM public.cvterm
 INNER JOIN godb.go_acc USING (dbxref_id);
--materialized_view
CREATE TABLE godb.term (
  id bigint,
  acc text,
  name varchar(1024),
  is_obsolete int,
  is_root int
);
CREATE INDEX term_idx1 ON godb.term (id);
CREATE INDEX term_idx2 ON godb.term (acc);
CREATE INDEX term_idx3 ON godb.term (name);
CREATE INDEX term_idx4 ON godb.term (is_obsolete);
CREATE INDEX term_idx5 ON godb.term (is_root);
--load
INSERT INTO godb.term SELECT * FROM godb.v_term;
UPDATE godb.term SET is_root = 1 WHERE id IN (SELECT cvterm_id FROM public.cvterm WHERE cvterm_id NOT IN (SELECT DISTINCT subject_id FROM public.cvterm_relationship) AND is_obsolete = 0 AND is_relationshiptype = 0);

--- term_definition
--view
CREATE OR REPLACE VIEW godb.term_definition AS
SELECT
 cvterm_id AS term_id,
 definition AS term_definition
FROM public.cvterm
WHERE definition IS NOT NULL;

--- term_dbxref
--view
CREATE OR REPLACE VIEW godb.term_dbxref AS
SELECT
 cvterm_id AS term_id,
 dbxref_id  AS dbxref_id,
 0 AS is_for_definition
FROM public.cvterm_dbxref;

--- term_synonym
--view
CREATE OR REPLACE VIEW godb.term_synonym AS
SELECT
 cvterm_id AS term_id,
 synonym  AS term_synonym,
 type_id AS synonym_type_id
FROM public.cvtermsynonym;

--- term2term
--view
CREATE OR REPLACE VIEW godb.v_term2term AS
SELECT
 cvterm_relationship_id AS id,
 type_id           AS relationship_type_id,
 object_id         AS term1_id,
 subject_id        AS term2_id
FROM public.cvterm_relationship;
--materialized view
CREATE TABLE godb.term2term (
  id bigint,
  relationship_type_id bigint,
  term1_id bigint,
  term2_id bigint
);
CREATE INDEX term2term_idx1 ON godb.term2term (id);
CREATE INDEX term2term_idx2 ON godb.term2term (relationship_type_id);
CREATE INDEX term2term_idx3 ON godb.term2term (term1_id);
CREATE INDEX term2term_idx4 ON godb.term2term (term2_id);
--load
INSERT INTO godb.term2term SELECT * FROM godb.v_term2term;

--- graph_path
--view
CREATE OR REPLACE VIEW godb.v_graph_path AS
SELECT
 cvtermpath_id      AS id,
 object_id      AS term1_id,
 subject_id     AS term2_id,
 pathdistance   AS distance
FROM public.cvtermpath;
--materialized view
CREATE TABLE godb.graph_path (
  id bigint,
  term1_id bigint,
  term2_id bigint,
  distance int 
);
CREATE INDEX graph_path_idx1 ON godb.graph_path (id);
CREATE INDEX graph_path_idx2 ON godb.graph_path (term1_id);
CREATE INDEX graph_path_idx3 ON godb.graph_path (term2_id);
CREATE INDEX graph_path_idx4 ON godb.graph_path (distance);
--load
INSERT INTO godb.graph_path SELECT * FROM godb.v_graph_path;

--Activate this to make this bridge take precedence
--SET SEARCH PATH TO godb,public;

-- species
--view
CREATE OR REPLACE VIEW godb.species AS
 SELECT
  organism_id                        AS id,
--FIXME this cast does not work
--  CAST(accession AS BIGINT)             AS ncbi_taxa_id,
  accession                          AS ncbi_taxa_id,
  common_name,
  CAST(NULL AS VARCHAR)              AS lineage_string,
  genus,
  species
 FROM
  public.organism
  INNER JOIN public.organism_dbxref USING (organism_id)
  INNER JOIN public.dbxref USING (dbxref_id) --schema correct?
  INNER JOIN public.db     USING (db_id) --schema correct?
 WHERE db.name='NCBITaxon';

-- gene_product
--  note: secondary_species_id; eg for host species with parasite
--        this will be handled by a feature_relationship in chado
--view
CREATE OR REPLACE VIEW godb.gene_product AS
 SELECT
  feature_id                    AS id,
  name                          AS symbol,
  dbxref_id                     AS dbxref_id,
  organism_id                   AS species_id,
  CAST(NULL AS BIGINT)          AS secondary_species_id,
  type_id,
-- todo: make this a left outer join on a featureprop
  name                          AS fullname
 FROM public.feature;

-- gene_product_synonym
--view
CREATE OR REPLACE VIEW godb.gene_product_synonym AS
 SELECT
  feature_id                    AS gene_product_id,
  synonym.name                  AS product_synonym
 FROM
  public.feature_synonym INNER JOIN public.synonym USING (synonym_id);

-- gene_product_property
--view
CREATE OR REPLACE VIEW godb.gene_product_property AS
 SELECT
  feature_id                    AS gene_product_id,
  prop.name                     AS property_key,
  value                         AS property_value
 FROM
  public.featureprop INNER JOIN public.cvterm AS prop ON (type_id=cvterm_id);

-- association
--view
CREATE OR REPLACE VIEW godb.association AS
 SELECT
  feature_cvterm_id             AS id,
  cvterm_id                     AS term_id,
  feature_id                    AS gene_product_id,
  is_not                        AS is_not,
-- not used in godb; maybe never will be
  0                             AS role_group,
-- lets be lazy for now; these are actually feature_cvtermprops
  0                             AS assocdate,
  0                             AS source_db_id
 FROM public.feature_cvterm;

-- association_qualifier
--view
CREATE OR REPLACE VIEW godb.association_qualifier AS
 SELECT
  feature_cvtermprop_id         AS id,
  feature_cvterm_id             AS association_id,
  type_id                       AS term_id,
  value                         AS value
 FROM public.feature_cvtermprop;

-- evidence
-- TODO!!
--view
CREATE OR REPLACE VIEW godb.evidence AS
 SELECT
  feature_cvtermprop_id         AS id,
  feature_cvterm_id             AS association_id,
  type_id                       AS term_id,
  value                         AS value
 FROM public.feature_cvtermprop
 INNER JOIN public.feature_cvterm_dbxref USING (feature_cvterm_id);

-- evidence_dbxref
-- TODO
--view
CREATE OR REPLACE VIEW godb.evidence_dbxref AS
 SELECT
  feature_cvterm_id         AS id,
  feature_cvterm_id             AS association_id,
  type_id                       AS term_id,
  value                         AS value
 FROM public.feature_cvtermprop
 INNER JOIN public.feature_cvterm_dbxref USING (feature_cvterm_id);

-- seq
--view
CREATE OR REPLACE VIEW godb.seq AS
 SELECT
  feature_id                    AS id,
  feature.name                  AS display_id,
  feature.name                  AS description,
  residues                      AS seq,
  seqlen                        AS seq_len,
  md5checksum,
  type.name                     AS moltype,
-- this is never used
  0                             AS timestamp
 FROM public.feature INNER JOIN public.cvterm AS type ON (type_id=cvterm_id);

-- seq_property [not used?]
--view
CREATE OR REPLACE VIEW godb.seq_property AS
 SELECT
  *
 FROM public.feature WHERE NULL;

-- seq_dbxref
-- [in GO this typically has interpro ids, etc; diff semantics here]
--view
CREATE OR REPLACE VIEW godb.seq_dbxref AS
 SELECT 
  feature_id                    AS seq_id,
  dbxref_id
 FROM public.feature_dbxref;

-- gene_product_seq
-- 1:{0,1} relationship between features and seqs in chado
--view
CREATE OR REPLACE VIEW godb.gene_product_seq AS
 SELECT
  feature_id              AS gene_product_id,
  feature_id              AS seq_id
 FROM public.feature
 WHERE feature.residues IS NOT NULL;

-- gene_product_count
-- this is an OPTIONAL godb optimization
--  simulate a rowless table for now
--view
CREATE OR REPLACE VIEW godb.gene_product_count AS
 SELECT
  *
 FROM public.feature WHERE NULL;



-- ************************************************************
-- UPDATE RULES
-- ************************************************************
-- NOT COMPLETE

-- we don't really need these as loading is easy enough
-- to do with xslts

CREATE RULE "_RuleI_term_definition" AS
 ON INSERT TO godb.term_definition
 DO INSTEAD
  UPDATE public.cvterm
  SET
  definition = NEW.term_definition
  WHERE cvterm_id = NEW.term_id;

CREATE RULE "_RuleU_term_definition" AS
 ON UPDATE TO godb.term_definition
 DO INSTEAD
  UPDATE public.cvterm
  SET
  definition = NEW.term_definition
  WHERE cvterm_id = OLD.term_id;

CREATE RULE "_RuleD_term_definition" AS
 ON DELETE TO godb.term_definition
 DO INSTEAD
  UPDATE public.cvterm
  SET
  definition = NULL
  WHERE cvterm_id = OLD.term_id;

CREATE RULE "_RuleI_term2term" AS
 ON INSERT TO godb.term2term
 DO INSTEAD
  INSERT INTO public.cvterm_relationship
  (
   type_id,
   object_id,
   subject_id)
  VALUES
  (
   NEW.relationship_type_id, 
   NEW.term1_id,
   NEW.term2_id
  );

CREATE RULE "_RuleU_term2term" AS
 ON UPDATE TO godb.term2term
 DO INSTEAD
  UPDATE public.cvterm_relationship
  SET
 type_id        = NEW.relationship_type_id,
 object_id        = NEW.term1_id,
 subject_id       = NEW.term2_id
  WHERE cvterm_relationship_id = OLD.id;

CREATE RULE "_RuleD_term2term" AS
 ON DELETE TO godb.term2term
 DO INSTEAD
  DELETE FROM public.cvterm_relationship
  WHERE cvterm_relationship_id = OLD.id;

CREATE RULE "_RuleI_graph_path" AS
 ON INSERT TO godb.graph_path
 DO INSTEAD
  INSERT INTO public.cvtermpath
  (
   type_id,
   object_id,
   subject_id,
   pathdistance)
  VALUES
  (
   NULL,
   NEW.term1_id,
   NEW.term2_id,
   NEW.distance
  );

CREATE RULE "_RuleU_graph_path" AS
 ON UPDATE TO godb.graph_path
 DO INSTEAD
  UPDATE public.cvtermpath
  SET
 type_id        = NULL,
 object_id        = NEW.term1_id,
 subject_id       = NEW.term2_id,
 pathdistance      = NEW.distance
  WHERE cvtermpath_id = OLD.id;

CREATE RULE "_RuleD_graph_path" AS
 ON DELETE TO godb.graph_path
 DO INSTEAD
  DELETE FROM public.cvtermpath
  WHERE cvtermpath_id = OLD.id;


CREATE RULE "_RuleI_term_synonym" AS
 ON INSERT TO godb.term_synonym
 DO INSTEAD
  INSERT INTO public.cvtermsynonym
  (cvterm_id, synonym)
  VALUES
  (NEW.term_id, NEW.term_synonym);

CREATE RULE "_RuleU_term_synonym" AS
 ON UPDATE TO godb.term_synonym
 DO INSTEAD
  UPDATE public.cvtermsynonym
  SET
 cvterm_id = NEW.term_id,
 synonym  = NEW.term_synonym
  WHERE cvterm_id = OLD.term_id AND synonym = OLD.term_synonym;

CREATE RULE "_RuleD_term_synonym" AS
 ON DELETE TO godb.term_synonym
 DO INSTEAD
  DELETE FROM public.cvtermsynonym
  WHERE cvterm_id = OLD.term_id AND synonym = OLD.term_synonym;

CREATE RULE "_RuleI_term_dbxref" AS
 ON INSERT TO godb.term_dbxref
 DO INSTEAD
  INSERT INTO public.cvterm_dbxref
  (cvterm_id, dbxref_id)
  VALUES
  (NEW.term_id, NEW.dbxref_id);

CREATE RULE "_RuleU_term_dbxref" AS
 ON UPDATE TO godb.term_dbxref
 DO INSTEAD
  UPDATE public.cvterm_dbxref
  SET
 cvterm_id = NEW.term_id,
 dbxref_id  = NEW.dbxref_id
  WHERE cvterm_id = OLD.term_id AND dbxref_id = OLD.dbxref_id;

CREATE RULE "_RuleD_term_dbxref" AS
 ON DELETE TO godb.term_dbxref
 DO INSTEAD
  DELETE FROM public.cvterm_dbxref
  WHERE cvterm_id = OLD.term_id AND dbxref_id = OLD.dbxref_id;