1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
|
--Note that the so schema is left out of this diff
ALTER TABLE feature ALTER residues SET STORAGE EXTERNAL;
COMMENT ON COLUMN feature.residues IS 'A sequence of alphabetic characters
representing biological residues (nucleic acids, amino acids). This
column does not need to be manifested for all features; it is optional
for features such as exons where the residues can be derived from the
featureloc. It is recommended that the value for this column be
manifested for features which may may non-contiguous sublocations (e.g.
transcripts), since derivation at query time is non-trivial. For
expressed sequence, the DNA sequence should be used rather than the
RNA sequence. The default storage method for the residues column is
EXTERNAL, which will store it uncompressed to make substring operations
faster.';
ALTER TABLE feature_synonym ALTER COLUMN is_current SET DEFAULT 'false';
CREATE OR REPLACE FUNCTION share_exons () RETURNS void AS '
DECLARE
BEGIN
/* Generate a table of shared exons */
CREATE temporary TABLE shared_exons AS
SELECT gene.feature_id as gene_feature_id
, gene.uniquename as gene_uniquename
, transcript1.uniquename as transcript1
, exon1.feature_id as exon1_feature_id
, exon1.uniquename as exon1_uniquename
, transcript2.uniquename as transcript2
, exon2.feature_id as exon2_feature_id
, exon2.uniquename as exon2_uniquename
, exon1_loc.fmin /* = exon2_loc.fmin */
, exon1_loc.fmax /* = exon2_loc.fmax */
FROM feature gene
JOIN cvterm gene_type ON gene.type_id = gene_type.cvterm_id
JOIN cv gene_type_cv USING (cv_id)
JOIN feature_relationship gene_transcript1 ON gene.feature_id = gene_transcript1.object_id
JOIN feature transcript1 ON gene_transcript1.subject_id = transcript1.feature_id
JOIN cvterm transcript1_type ON transcript1.type_id = transcript1_type.cvterm_id
JOIN cv transcript1_type_cv ON transcript1_type.cv_id = transcript1_type_cv.cv_id
JOIN feature_relationship transcript1_exon1 ON transcript1_exon1.object_id = transcript1.feature_id
JOIN feature exon1 ON transcript1_exon1.subject_id = exon1.feature_id
JOIN cvterm exon1_type ON exon1.type_id = exon1_type.cvterm_id
JOIN cv exon1_type_cv ON exon1_type.cv_id = exon1_type_cv.cv_id
JOIN featureloc exon1_loc ON exon1_loc.feature_id = exon1.feature_id
JOIN feature_relationship gene_transcript2 ON gene.feature_id = gene_transcript2.object_id
JOIN feature transcript2 ON gene_transcript2.subject_id = transcript2.feature_id
JOIN cvterm transcript2_type ON transcript2.type_id = transcript2_type.cvterm_id
JOIN cv transcript2_type_cv ON transcript2_type.cv_id = transcript2_type_cv.cv_id
JOIN feature_relationship transcript2_exon2 ON transcript2_exon2.object_id = transcript2.feature_id
JOIN feature exon2 ON transcript2_exon2.subject_id = exon2.feature_id
JOIN cvterm exon2_type ON exon2.type_id = exon2_type.cvterm_id
JOIN cv exon2_type_cv ON exon2_type.cv_id = exon2_type_cv.cv_id
JOIN featureloc exon2_loc ON exon2_loc.feature_id = exon2.feature_id
WHERE gene_type_cv.name = ''sequence''
AND gene_type.name = ''gene''
AND transcript1_type_cv.name = ''sequence''
AND transcript1_type.name = ''mRNA''
AND transcript2_type_cv.name = ''sequence''
AND transcript2_type.name = ''mRNA''
AND exon1_type_cv.name = ''sequence''
AND exon1_type.name = ''exon''
AND exon2_type_cv.name = ''sequence''
AND exon2_type.name = ''exon''
AND exon1.feature_id < exon2.feature_id
AND exon1_loc.rank = 0
AND exon2_loc.rank = 0
AND exon1_loc.fmin = exon2_loc.fmin
AND exon1_loc.fmax = exon2_loc.fmax
;
CREATE temporary TABLE canonical_exon_representatives AS
SELECT gene_feature_id, min(exon1_feature_id) AS canonical_feature_id, fmin
FROM shared_exons
GROUP BY gene_feature_id,fmin
;
CREATE temporary TABLE exon_replacements AS
SELECT DISTINCT shared_exons.exon2_feature_id AS actual_feature_id
, canonical_exon_representatives.canonical_feature_id
, canonical_exon_representatives.fmin
FROM shared_exons
JOIN canonical_exon_representatives USING (gene_feature_id)
WHERE shared_exons.exon2_feature_id <> canonical_exon_representatives.canonical_feature_id
AND shared_exons.fmin = canonical_exon_representatives.fmin
;
UPDATE feature_relationship
SET subject_id = (
SELECT canonical_feature_id
FROM exon_replacements
WHERE feature_relationship.subject_id = exon_replacements.actual_feature_id)
WHERE subject_id IN (
SELECT actual_feature_id FROM exon_replacements
);
UPDATE feature_relationship
SET object_id = (
SELECT canonical_feature_id
FROM exon_replacements
WHERE feature_relationship.subject_id = exon_replacements.actual_feature_id)
WHERE object_id IN (
SELECT actual_feature_id FROM exon_replacements
);
UPDATE feature
SET is_obsolete = true
WHERE feature_id IN (
SELECT actual_feature_id FROM exon_replacements
);
END;
' LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION order_exons (integer) RETURNS void AS '
DECLARE
parent_type ALIAS FOR $1;
exon_id int;
part_of int;
exon_type int;
strand int;
arow RECORD;
order_by varchar;
rowcount int;
exon_count int;
ordered_exons int;
transcript_id int;
transcript_row feature%ROWTYPE;
BEGIN
SELECT INTO part_of cvterm_id FROM cvterm WHERE name=''part_of''
AND cv_id IN (SELECT cv_id FROM cv WHERE name=''relationship'');
--SELECT INTO exon_type cvterm_id FROM cvterm WHERE name=''exon''
-- AND cv_id IN (SELECT cv_id FROM cv WHERE name=''sequence'');
--RAISE NOTICE ''part_of %, exon %'',part_of,exon_type;
FOR transcript_row IN
SELECT * FROM feature WHERE type_id = parent_type
LOOP
transcript_id = transcript_row.feature_id;
SELECT INTO rowcount count(*) FROM feature_relationship
WHERE object_id = transcript_id
AND rank = 0;
--Dont modify this transcript if there are already numbered exons or
--if there is only one exon
IF rowcount = 1 THEN
--RAISE NOTICE ''skipping transcript %, row count %'',transcript_id,rowcount;
CONTINUE;
END IF;
--need to reverse the order if the strand is negative
SELECT INTO strand strand FROM featureloc WHERE feature_id=transcript_id;
IF strand > 0 THEN
order_by = ''fl.fmin'';
ELSE
order_by = ''fl.fmax desc'';
END IF;
exon_count = 0;
FOR arow IN EXECUTE
''SELECT fr.*, fl.fmin, fl.fmax
FROM feature_relationship fr, featureloc fl
WHERE fr.object_id = ''||transcript_id||''
AND fr.subject_id = fl.feature_id
AND fr.type_id = ''||part_of||''
ORDER BY ''||order_by
LOOP
--number the exons for a given transcript
UPDATE feature_relationship
SET rank = exon_count
WHERE feature_relationship_id = arow.feature_relationship_id;
exon_count = exon_count + 1;
END LOOP;
END LOOP;
END;
' LANGUAGE 'plpgsql';
ALTER TABLE analysisprop ADD COLUMN rank int not null default 0;
CREATE TABLE analysisfeatureprop (
analysisfeatureprop_id SERIAL PRIMARY KEY,
analysisfeature_id INTEGER NOT NULL REFERENCES analysisfeature(analysisfeature_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
type_id INTEGER NOT NULL REFERENCES cvterm(cvterm_id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
value TEXT,
rank INTEGER NOT NULL,
CONSTRAINT analysisfeature_id_type_id_rank UNIQUE(analysisfeature_id, type_id, rank)
);
ALTER TABLE phenotype_comparison_cvterm ADD CONSTRAINT phenotype_comparison_cvterm_pub_id_fkey FOREIGN KEY (pub_id) REFERENCES pub(pub_id) ON DELETE CASCADE;
create table cell_line (
cell_line_id serial not null,
primary key (cell_line_id),
name varchar(255) null,
uniquename varchar(255) not null,
organism_id int not null,
foreign key (organism_id) references organism (organism_id) on delete cascade INITIALLY DEFERRED,
timeaccessioned timestamp not null default current_timestamp,
timelastmodified timestamp not null default current_timestamp,
constraint cell_line_c1 unique (uniquename, organism_id)
);
grant all on cell_line to PUBLIC;
create table cell_line_relationship (
cell_line_relationship_id serial not null,
primary key (cell_line_relationship_id),
subject_id int not null,
foreign key (subject_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
object_id int not null,
foreign key (object_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
type_id int not null,
foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED,
constraint cell_line_relationship_c1 unique (subject_id, object_id, type_id)
);
grant all on cell_line_relationship to PUBLIC;
create table cell_line_synonym (
cell_line_synonym_id serial not null,
primary key (cell_line_synonym_id),
cell_line_id int not null,
foreign key (cell_line_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
synonym_id int not null,
foreign key (synonym_id) references synonym (synonym_id) on delete cascade INITIALLY DEFERRED,
pub_id int not null,
foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED,
is_current boolean not null default 'false',
is_internal boolean not null default 'false',
constraint cell_line_synonym_c1 unique (synonym_id,cell_line_id,pub_id)
);
grant all on cell_line_synonym to PUBLIC;
create table cell_line_cvterm (
cell_line_cvterm_id serial not null,
primary key (cell_line_cvterm_id),
cell_line_id int not null,
foreign key (cell_line_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
cvterm_id int not null,
foreign key (cvterm_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED,
pub_id int not null,
foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED,
rank int not null default 0,
constraint cell_line_cvterm_c1 unique (cell_line_id,cvterm_id,pub_id,rank)
);
grant all on cell_line_cvterm to PUBLIC;
create table cell_line_dbxref (
cell_line_dbxref_id serial not null,
primary key (cell_line_dbxref_id),
cell_line_id int not null,
foreign key (cell_line_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
dbxref_id int not null,
foreign key (dbxref_id) references dbxref (dbxref_id) on delete cascade INITIALLY DEFERRED,
is_current boolean not null default 'true',
constraint cell_line_dbxref_c1 unique (cell_line_id,dbxref_id)
);
grant all on cell_line_dbxref to PUBLIC;
create table cell_lineprop (
cell_lineprop_id serial not null,
primary key (cell_lineprop_id),
cell_line_id int not null,
foreign key (cell_line_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
type_id int not null,
foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED,
value text null,
rank int not null default 0,
constraint cell_lineprop_c1 unique (cell_line_id,type_id,rank)
);
grant all on cell_lineprop to PUBLIC;
create table cell_lineprop_pub (
cell_lineprop_pub_id serial not null,
primary key (cell_lineprop_pub_id),
cell_lineprop_id int not null,
foreign key (cell_lineprop_id) references cell_lineprop (cell_lineprop_id) on delete cascade INITIALLY DEFERRED,
pub_id int not null,
foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED,
constraint cell_lineprop_pub_c1 unique (cell_lineprop_id,pub_id)
);
grant all on cell_lineprop_pub to PUBLIC;
create table cell_line_feature (
cell_line_feature_id serial not null,
primary key (cell_line_feature_id),
cell_line_id int not null,
foreign key (cell_line_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
feature_id int not null,
foreign key (feature_id) references feature (feature_id) on delete cascade INITIALLY DEFERRED,
pub_id int not null,
foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED,
constraint cell_line_feature_c1 unique (cell_line_id, feature_id, pub_id)
);
grant all on cell_line_feature to PUBLIC;
create table cell_line_cvtermprop (
cell_line_cvtermprop_id serial not null,
primary key (cell_line_cvtermprop_id),
cell_line_cvterm_id int not null,
foreign key (cell_line_cvterm_id) references cell_line_cvterm (cell_line_cvterm_id) on delete cascade INITIALLY DEFERRED,
type_id int not null,
foreign key (type_id) references cvterm (cvterm_id) on delete cascade INITIALLY DEFERRED,
value text null,
rank int not null default 0,
constraint cell_line_cvtermprop_c1 unique (cell_line_cvterm_id, type_id, rank)
);
grant all on cell_line_cvtermprop to PUBLIC;
create table cell_line_pub (
cell_line_pub_id serial not null,
primary key (cell_line_pub_id),
cell_line_id int not null,
foreign key (cell_line_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
pub_id int not null,
foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED,
constraint cell_line_pub_c1 unique (cell_line_id, pub_id)
);
grant all on cell_line_pub to PUBLIC;
create table cell_line_library (
cell_line_library_id serial not null,
primary key (cell_line_library_id),
cell_line_id int not null,
foreign key (cell_line_id) references cell_line (cell_line_id) on delete cascade INITIALLY DEFERRED,
library_id int not null,
foreign key (library_id) references library (library_id) on delete cascade INITIALLY DEFERRED,
pub_id int not null,
foreign key (pub_id) references pub (pub_id) on delete cascade INITIALLY DEFERRED,
constraint cell_line_library_c1 unique (cell_line_id, library_id, pub_id)
);
grant all on cell_line_library to PUBLIC;
CREATE OR REPLACE VIEW gff3view (
feature_id, ref, source, type, fstart, fend,
score, strand, phase, seqlen, name, organism_id
) AS
SELECT
f.feature_id, sf.name, gffdbx.accession, cv.name,
fl.fmin+1, fl.fmax, af.significance, fl.strand,
fl.phase, f.seqlen, f.name, f.organism_id
FROM feature f
LEFT JOIN featureloc fl ON (f.feature_id = fl.feature_id)
LEFT JOIN feature sf ON (fl.srcfeature_id = sf.feature_id)
LEFT JOIN ( SELECT fd.feature_id, d.accession
FROM feature_dbxref fd
JOIN dbxref d using(dbxref_id)
JOIN db using(db_id)
WHERE db.name = 'GFF_source'
) as gffdbx
ON (f.feature_id=gffdbx.feature_id)
LEFT JOIN cvterm cv ON (f.type_id = cv.cvterm_id)
LEFT JOIN analysisfeature af ON (f.feature_id = af.feature_id);
CREATE OR REPLACE VIEW all_feature_names (
feature_id,
name,
organism_id
) AS
SELECT feature_id,CAST(substring(uniquename from 0 for 255) as varchar(255)) as name,organism_id FROM feature
UNION
SELECT feature_id, name, organism_id FROM feature where name is not null
UNION
SELECT fs.feature_id,s.name,f.organism_id FROM feature_synonym fs, synonym s, feature f
WHERE fs.synonym_id = s.synonym_id AND fs.feature_id = f.feature_id
UNION
SELECT fp.feature_id, CAST(substring(fp.value from 0 for 255) as varchar(255)) as name,f.organism_id FROM featureprop fp, feature f
WHERE f.feature_id = fp.feature_id
UNION
SELECT fd.feature_id, d.accession, f.organism_id FROM feature_dbxref fd, dbxref d,feature f
WHERE fd.dbxref_id = d.dbxref_id AND fd.feature_id = f.feature_id;
|