File: populate-match-floc.sql

package info (click to toggle)
libchado-perl 1.22-4
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 24,024 kB
  • sloc: xml: 192,540; sql: 165,936; perl: 28,298; sh: 101; python: 73; makefile: 46
file content (49 lines) | stat: -rw-r--r-- 1,526 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
--
-- the location of a parent match feature is the maximal extent
-- of the featurelocs of the child, on a per-rank basis
--
CREATE TEMPORARY TABLE floc_tmp 
 AS SELECT
        feature_relationship.object_id AS feature_id,
        featureloc.rank                AS rank,
        min(featureloc.fmin) AS fmin,
        max(featureloc.fmax) AS fmax,
        min(featureloc.strand) AS strandmin,
        max(featureloc.strand) AS strandmax
 FROM        feature
  INNER JOIN featureloc           USING (feature_id)
  INNER JOIN feature_relationship ON (featureloc.feature_id=subject_id)
 WHERE feature.type_id IN 
                (SELECT cvterm_id 
                 FROM cvterm
                 WHERE name='match')
       AND locgroup = 0
 GROUP BY feature_relationship.object_id, featureloc.rank;

--
-- how do we treat child features on varying strands?
-- we can either insert consistent-only or insert all

-- OPTION 1
INSERT INTO featureloc
 (feature_id,rank,fmin,fmax,strand)
 SELECT 
  feature_id,rank,fmin,fmax,strandmin
 FROM floc_tmp
 WHERE strandmin = strandmax
 ORDER BY feature_id,rank;

-- OPTION 2
-- just choose strand 1; or should it be strand 0?
INSERT INTO featureloc
 (feature_id,rank,fmin,fmax,strand)
 SELECT 
  feature_id,rank,fmin,fmax,1
 FROM floc_tmp
 ORDER BY feature_id,rank;

-- report inconsistent;
-- perhaps we actually want to include these, with arbitrary strand?
SELECT count(feature_id) FROM floc_tmp WHERE strandmin != strandmax;
SELECT feature_id FROM floc_tmp WHERE strandmin != strandmax;