File: sequence-dbapi.sqli

package info (click to toggle)
libchado-perl 1.23-2
links: PTS, VCS
area: main
in suites: jessie, jessie-kfreebsd
size: 23,976 kB
ctags: 10,378
sloc: xml: 192,540; sql: 165,945; perl: 28,339; sh: 101; python: 73; makefile: 46
file content (530 lines) | stat: -rw-r--r-- 25,279 bytes
parent folder | download | duplicates (5)
-- $Id: sequence-dbapi.sqli,v 1.2 2005-03-15 21:28:15 sshu Exp $
-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-- sequence-dbapi.sqli
--
--  STATUS: alpha
--
--  this interface describes the functions implemented by
--  a chado database over the sequence module
--
-- [at this time, this API specifies functions some of which do not
--  yet have an implementation - a default impl will be
--  provided soon]
--
-- the interface is specified in pseudo-SQL function syntax
-- it is intended as formal documentation for DB Admins and
-- application programmers. It is not intended to be used
-- directly by the DB. The DB should implement these functions
-- using a language pertinent to the DBMS implementing policies
-- pertinent to the policy chosen by the MOD and DB Admin.
--
-- a default postgresql implementation will be provided, in
-- the functions/ directory. hopefully it should not be difficult
-- to port these to other DBMS systems
--
-- the DB API contains granual 'atomic' functions; that is,
-- functions that neither accept not return complex datatypes
-- such as objects, XML or other data structures.
-- The API accepts/returns primitive values and relations.
-- As such, the DB API is perhaps mostly useful for applications that
-- modify the database. The API is intended to be complementary
-- to APIs that accept or return complex datatypes, such as ChadoXML
--
-- CONVENTIONS:
--   functions are generally named <verb>_<noun_phrase>
--   the noun phrase typically refers to a chado table name,
--   a type in some ontology such as SO, or an emergent
--   table/type, such as "gene model"
--
-- TODO: Document possible exceptions raised
-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

-- ************************************************************
-- ** Data Modification Functions                            **
-- **                                                        **
-- ** Calling any of these functions can result in data      **
-- ** being modified                                         **
-- **                                                        **
-- ** Each function should note in the comments which        **
-- ** tables are affected                                    **
-- **  [d] - may result in deletion                          **
-- **  [u] - may result in update                            **
-- **  [i] - may result in insertion                         **
-- ** Cascading deletes are not explicitly noted;            **
-- ** (ie if table 'feature' can be deleted then             **
-- **  'featureloc' may be deleted as a consequence)         **
-- ************************************************************

-- ============================================================
-- FUNCTION: delete_gene_model
-- ============================================================
--  + different chado instances may have different policies
--  + simple policy: blow away gene and subfeatures
--  + integrated-db policy: remove exons and all featurelocs
--      (preserves data attached to features)
--  + undecided: additional tracking? obsoletion property?
-- TABLES AFFECTED: feature[d]
DECLARE FUNCTION
 delete_gene_model(gene_feature_id       integer NOT NULL);
DECLARE FUNCTION
 delete_gene_model(uniquename            varchar NOT NULL, 
                   organism_id           integer NOT NULL);

-- ============================================================
-- FUNCTION: delete_transcript_model
-- ============================================================
--  + see delete_gene_model
-- TABLES AFFECTED: feature[d]
DECLARE FUNCTION
 delete_transcript_model(transcript_feature_id   integer NOT NULL);
DECLARE FUNCTION
 delete_transcript_model(uniquename              varchar NOT NULL,     
                         organism_id             integer NOT NULL);

-- ============================================================
-- remove exon from transcript
-- ============================================================
--  + deletes feature_relationship
--  + will delete feature if this is the only exon on this transcript
-- TABLES AFFECTED: feature[d] feature_relationship[d]
DECLARE FUNCTION
 remove_exon_from_transcript(transcript_feature_id       integer NOT NULL,
                             exon_fmin                   integer NOT NULL,
                             exon_fmax                   integer NOT NULL)
         RETURNS remaining_transcript_exon_count         integer NOT NULL;

-- ============================================================
-- FUNCTION: add_exon_to_transcript
-- ============================================================
--  + adds feature_relationship
--  + will create exon feature if this is the first exon on this transcript
--  + will generate a name for the new exon if required
-- TABLES AFFECTED: feature[i] featureloc[i] feature_relationship[i]
DELCARE
 add_exon_to_transcript(transcript_feature_id            integer NOT NULL,
                        exon_fmin                        integer NOT NULL,
                        exon_fmax                        integer NOT NULL,
                        exon_strand                      integer NOT NULL)

         RETURNS remaining_transcript_exon_count         integer NOT NULL;
-- ============================================================
-- FUNCTION: change_exonloc_for_transcript
-- ============================================================
--  + changes exon coordinates within a transcript
--  + coordinates are relative to genomic
--  + if a transcript has >1 exon, this will split the exon
--    UNLESS the new coordinates match an existing exon
--  + will generate a name for the new exon if required
-- TABLES AFFECTED: feature[idu] featureloc[idu] feature_relationship[id]
DECLARE FUNCTION
 change_exonloc_for_transcript(transcript_feature_id     integer NOT NULL, 
                               exon_fmin_old             integer NOT NULL,
                               exon_fmax_old             integer NOT NULL,
                               exon_fmin_new             integer NOT NULL,
                               exon_fmax_new             integer NOT NULL)

         RETURNS remaining_transcript_exon_count         integer NOT NULL;

-- ============================================================
-- FUNCTION: set_feature_residues
-- ============================================================
-- convenience method for setting feature.residues
-- [should this be handled by triggers?]
--  + also sets feature.seqlen
--  + also sets feature.md5checksum
--
-- setting residues to NULL will *NOT* clear seqlen or md5checksum
--
-- is this too onerous for the DB to implement? the MD5 calculation
-- will require using the pg foreign language interface
-- TABLES AFFECTED: feature[u]
DECLARE FUNCTION
 set_feature_residues(feature_id                         integer NOT NULL,
                      residues                           text);

-- ============================================================
-- FUNCTION: new_feature
-- ============================================================
-- inserts a new feature
--  + ftype           - feature type (SO cv assumed)
--  + root_feature_id - root of the feature graph (eg gene)
--  + if the feature does not have a name or uniquename, this
--    will generated based on the type and root_feature
--    (root_feature_id and uniquename cannot BOTH be null)
--    [this allows centralization of naming policy]
--  + organism_id MUST be set UNLESS set_default_organism has been called
-- TABLES AFFECTED: feature[i] dbxref[iu]
DECLARE FUNCTION
 new_basic_analysis_feature(name                         varchar,
                            uniquename                   varchar,
                            dbxref_id                    integer,
                            ftype                        varchar NOT NULL,
                            residues                     text,
                            root_feature_id              integer
                            rank_by_root                 integer NOT NULL)

         RETURNS feature_id                              integer NOT NULL;

-- ============================================================
-- FUNCTION: new_basic_analysis_feature
-- ============================================================
-- inserts a new analysis feature. adds featureloc for convenience
-- basic analysis features typically have zero or one parent
-- TABLES AFFECTED: feature[i] dbxref[iu] featureloc[i]
--                  feature_relationship[id] analysisfeature[i]
DECLARE FUNCTION
 new_basic_analysis_feature(srcfeature_name            varchar NOT NULL,
                            program                    varchar NOT NULL,
                            sourcename                 varchar,
                            organism_id                integer,
                            ftype                      varchar DEFAULT 'match',
                            fmin                       integer NOT NULL,
                            fmax                       integer NOT NULL,
                            strand                     integer NOT NULL,
                            score                      float,
                            frame                      integer,
                            parent_feature_id          integer,
                            rank_by_root               integer)

         RETURNS feature_id                            integer NOT NULL;

-- ============================================================
-- FUNCTION: new_basic_match_feature
-- ============================================================
-- inserts a new analysis feature. adds BOTH featurelocs for convenience
-- basic match features typically have zero or one parent
--  (zero if Hit, 1 if HSP)
-- TABLES AFFECTED: feature[i] dbxref[iu] featureloc[i]
--                  feature_relationship[id] analysisfeature[i]
DECLARE FUNCTION
 new_basic_analysis_feature(srcfeature_name              varchar NOT NULL,
                            program                      varchar NOT NULL,
                            sourcename                   varchar,
                            organism_id                  integer,
                            fmin                         integer NOT NULL,
                            fmax                         integer NOT NULL,
                            strand                       integer NOT NULL,
                            target_fmin                  integer NOT NULL,
                            target_fmax                  integer NOT NULL,
                            score                        float,
                            frame                        integer,
                            cigar                        varchar,
                            parent_feature_id            integer,
                            rank_by_root                 integer)

         RETURNS feature_id                              integer NOT NULL;

-- ************************************************************
-- ** Non-modifying Functions                                **
-- ************************************************************
-- The functions below have no side-effects (ie they never
-- result in update/delete/insert of any data)

-- ============================================================
-- FUNCTION: synthesize_feature_uniquename
-- ============================================================
-- synthesizes the name of a feature using DB-specific naming policy
-- + ftype           - feature type (SO cv assumed)
-- + root_feature_id - root of the feature graph (eg gene)
-- + rank_by_root    - eg exon number within gene
DECLARE FUNCTION
 synthesize_feature_uniquename(ftype                     varchar NOT NULL,
                        root_feature_id                  integer NOT NULL,
                        rank_by_root                     integer NOT NULL)
 
         RETURNS uniquename                              varchar NOT NULL;

-- ============================================================
-- FUNCTION: get_sub_feature_ids
-- ============================================================
-- return feature's child feature_id and their child feature_id
-- all way down to leaf nodes
-- + root_feature_id - root of the feature graph (eg gene)
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_sub_feature_ids(root_feature_id                    integer not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_sub_feature_ids
-- ============================================================
-- return feature's child feature_id and their child feature_id
-- all way down to leaf nodes, with depth
-- + root_feature_id - root of the feature graph (eg gene)
-- + depth - graph depth (eg 1)
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_sub_feature_ids(root_feature_id                    integer not null,
                    depth                               integer not null)
        RETURNS setof feature_id, depth;

-- ============================================================
-- FUNCTION: get_sub_feature_ids
-- ============================================================
-- return feature (specified with sql arg) child feature_id
-- and their child feature_id all way down to leaf nodes
-- NOTE: sql must be like 'select distinct feature_id from ...'
-- + sql - to get root_feature_id of the feature graph (eg gene)
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_sub_feature_ids(sql                                text not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_up_feature_ids
-- ============================================================
-- return feature's parent feature_id and their parent feature_id
-- all way up to root
-- + leaf_feature_id - leaf of the feature graph (e.g. exon)
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_up_feature_ids(leaf_feature_id                     integer not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_up_feature_ids
-- ============================================================
-- return feature's parent feature_id and their parent feature_id
-- all way up to root, with depth value (in reverse order, leaf node smallest depth)
-- + leaf_feature_id - leaf of the feature graph (e.g. exon)
-- + depth - reversed graph depth
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_up_feature_ids(leaf_feature_id                     integer not null,
                    depth                               integer not null)
        RETURNS setof feature_id, depth;

-- ============================================================
-- FUNCTION: get_up_feature_ids
-- ============================================================
-- return feature (specified with sql arg) parent feature_id
-- and their parent feature_id all way up to root
-- NOTE: sql must be like 'select distinct feature_id from ...'
-- + sql - to get leaf_feature_id of the feature graph (eg exon)
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_up_feature_ids(sql                                text not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids
-- ============================================================
-- return all feature ids of the feature graph that feature_id
-- is in
-- + feature_id - feature id of the feature graph (e.g. mRNA)
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids(feature_id                             integer not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids
-- ============================================================
-- return all feature ids of the feature graph that feature_id
-- (specified with sql arg) is in
-- NOTE: sql must be like 'select distinct feature_id from ...'
-- + sql - get feature_id of a feature graph
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids(sql                                    text not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_sub_feature_ids_by_type_src
-- ============================================================
-- return child feature_id and their child feature_id of feature
-- specified by the feature type and the feature's src feature name
-- all way down to leaf nodes
-- + feature_type
-- + src - feature name that returned features are locateded on
-- + is_analysis
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_sub_feature_ids_by_type_src(feature_type           varchar not null
                                src                     varchar not null,
                                is_analysis             char(1) not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids_by_type
-- ============================================================
-- return all feature ids of the feature graph, some of which are
-- of the feature type and is_analysis value
-- + feature_type - SO term
-- + is_analysis
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids_by_type(feature_type                   varchar not null,
                        is_analysis                     char(1) not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids_by_type_src
-- ============================================================
-- return all feature ids of the feature graph, some of which are
-- of the feature type and is_analysis value
-- + feature_type - SO term
-- + src - feature name that returned features are locateded on
-- + is_analysis
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids_by_type(feature_type                   varchar not null,
                        src                             varchar not null,
                        is_analysis                     char(1) not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids_by_type_name
-- ============================================================
-- return all feature ids of the feature graph, some of which are
-- of the specified type and have the feature name
-- + feature_type - SO term
-- + feature_name - feature name, sql wild card (%) is allowed
-- + is_analysis
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids_by_type_name(feature_type              varchar not null,
                             feature_name               varchar not null,
                             is_analysis                char(1) not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids_by_ont
-- ============================================================
-- return all feature ids of the feature graph, some of which have
-- assignment of the cvterm of the aspect
-- + aspect - cv.name
-- + cvterm_name
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids_by_ont(aspect                          varchar not null,
                        cvterm_name                     varchar not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids_by_ont_root
-- ============================================================
-- return all feature ids of the feature graph, some of which have
-- assignment of the cvterm of the aspect or have assignment of
-- cvterms that are sub nodes of the cvterm of the aspect
-- + aspect - cv.name
-- + cvterm_name
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids_by_ont_root(aspect                     varchar not null,
                        cvterm_name                     varchar not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids_by_propval
-- ============================================================
-- return all feature ids of the feature graph, some of which have
-- the property value
-- + property_val
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids_by_propval(property_val                    varchar not null)
        RETURNS setof feature_id;

-- ============================================================
-- FUNCTION: get_feature_ids_by_child_count
-- ============================================================
-- return all feature ids of the feature graph, some of which
-- are of the feature type (parent_feature_type) who has
-- the specified number of child features of the feature type
-- (child_feature_type)
-- + parent_feature_type - SO term
-- + child_feature_type - SO term
-- + number
-- + operator - sql comparison operator string (eg >=, =, <, etc)
-- + is_analysis
-- impl: see functions/feature_ids_fx.plpgsql
DECLARE FUNCTION
 get_feature_ids_by_child_count(parent_feature_type         varchar not null,
                                child_feature_type          varchar not null,
                                number                      integer not null,
                                operator                    varchar not null,
                                is_analysis                 char(1) not null)
        RETURNS setof feature_id;


-- ************************************************************
-- ** Configuration Management                               **
-- ************************************************************

-- ============================================================
-- FUNCTION: set_default_organism_id
-- ============================================================
-- In some chado instances it is desirable to have a default
-- organism; when calling the methods above, the organism column
-- may be left blank, in which case the deafult is used
-- [NOTE: THIS SHOULD BE MOVED TO ORGANISM MODULE]
-- this will update a 'private' table (in seperate postgresql schemaspace?)
set_default_organism_id(organism_id                     integer NOT NULL);

-- ============================================================
-- FUNCTION: set_default_organism
-- ============================================================
-- [NOTE: THIS SHOULD BE MOVED TO ORGANISM MODULE]
-- will NOT create organism if not present
DECLARE FUNCTION
 set_default_organism_id(genus                           varchar NOT NULL,
                         species                         varchar NOT NULL)

         RETURNS organism_id                             integer NOT NULL;


-- ************************************************************
-- Mereological spatial relations
--
-- This is a declaration of relations which may be implemented
-- as either views or tables (typically only the latter in
-- a denormalized warehouse DB)
--
-- a RELATION declaration is stating that either a TABLE or VIEW
-- exists. The relation is not necessarily updateable (ie it
-- may be a view)
--
-- default VIEW implementations will be provided;
--  DB Admins may wish to materialize these views (eg in a report db)
-- ************************************************************

-- identical strand (ST)
DECLARE RELATION feature_on_same_strand_as_feature(subject_id, object_id, overlaplen);

-- at least 1bp of overlap (S!T)
DECLARE RELATION feature_overlaps_feature(subject_id, object_id, overlaplen);

-- overlaps AND on same strand (S!T)
DECLARE RELATION feature_overlaps_feature_on_same_strand(subject_id, object_id, overlaplen);

-- feature overlaps and completely contains the other feature (!ST)
DECLARE RELATION feature_contains_feature(subject_id, object_id);

-- inverse of above (!ST)
DECLARE RELATION feature_contained_by_feature(subject_id, object_id);

-- identity (ST)
DECLARE RELATION feature_same_loc_as_feature(subject_id, object_id);

-- edges touch but no overlap; distance=0 (S!T)
DECLARE RELATION feature_adjacent_to_feature(subject_id, object_id);
-- edges touch, subj upstream of obj (!S!T)
DECLARE RELATION feature_adjacent_upstream_of_feature(subject_id, object_id);
-- edges touch, subj downstream of obj (!S!T)
DECLARE RELATION feature_adjacent_downstream_of_feature(subject_id, object_id);

-- opposite of overlap (S!T)
DECLARE RELATION feature_disjoint_from_feature(subject_id, object_id, distance);

-- upstream and no overlap (!ST)
DECLARE RELATION feature_upstream_of_feature(subject_id, object_id, distance);

-- downstream and no overlap (!ST)
DECLARE RELATION feature_downstream_of_feature(subject_id, object_id, distance);

-- distance between features, -ve number indicates degree of overlap
DECLARE RELATION feature_distance(subject_id, object_id, distance);

-- part_of (!ST)
DECLARE RELATION feature_part_of_feature(subject_id, object_id);