File: t_soi_parse_intersect

package info (click to toggle)
libchado-perl 1.23-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 23,976 kB
  • ctags: 10,378
  • sloc: xml: 192,540; sql: 165,945; perl: 28,339; sh: 101; python: 73; makefile: 46
file content (111 lines) | stat: -rw-r--r-- 3,833 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env perl

use lib "../";
BEGIN {
    # to handle systems with no installed Test module
    # we include the t dir (where a copy of Test.pm is located)
    # as a fallback
    eval { require Test; };
    use Test;
    plan tests => 11;
}

use strict;
use warnings;
use XML::Parser::PerlSAX;
use SOI::SOIHandler;
use IO;
use SOI::IntersectGraph;
use SOI::Visitor;

#note: data/AE003790.cDNA.game.xml can be loaded into Apollo to do visual validation this test result
my $handler = SOI::SOIHandler->new([qw(chromosome_arm contig transposable_element gene mRNA tRNA exon protein companalysis match match_part)]);
my $parser = XML::Parser::PerlSAX->new(Handler=>$handler);
$parser->parse(Source => { SystemId =>"data/AE003790.soi.xml"});
my $feature = $handler->feature;

my $ig = SOI::IntersectGraph->new;

my (@genes, @trs, @te, @trna, @cdnas);
map{
    my $f = $_;
    if ($f->type eq 'gene') {
        push @genes, $f;
        map{
            if ($_->type eq 'mRNA') {
                push @trs, $_;
            }
            elsif ($_->type eq 'tRNA') {
                push @trna, $_;
            }
        }@{$f->nodes || []};
    }
    elsif ($f->type eq 'transposable_element') {
        push @te, $f;
    }
    elsif ($f->type eq 'companalysis' && $f->sourcename && $f->sourcename eq 'na_cDNA.dros') {
        push @cdnas, @{$f->nodes || []};
    }
    else {
        ;
    }
}@{$feature->nodes || []};

map{$_->set_depth(0)}(@trs, @cdnas);
map{SOI::Visitor->set_loc($_)}@cdnas;
$ig->find_intersects(\@trs,\@cdnas,{query_type=>'exon',subject_type=>'match_part',same_strand=>1,depth=>1,threshold=>0.95});
my %tr_w_cdna;
my %cdna_w_tr;
my %gene_w_cdna;
foreach my $tr (@trs) {
    my $cdnas = $ig->get_ilist($tr);
    next unless (@{$cdnas || []});
    map{$cdna_w_tr{$_->secondary_node->src_seq}++}@{$cdnas || []};
    $tr_w_cdna{$tr->uniquename}++;
}
foreach my $g (@genes) {
    if (grep{exists $tr_w_cdna{$_->uniquename}}@{$g->nodes || []}) {
        $gene_w_cdna{$g->uniquename}++;
    }
}
ok(scalar(@te), 3);
ok(scalar(@trna), 4);
ok(grep{$_ ne 'LD21171' && $_ ne 'GH14660' && $_ ne 'RE35072'}keys %cdna_w_tr, 1);
ok(scalar(keys %cdna_w_tr), 37);
ok(scalar(@{$ig->query_overlaps || {}}), 43);

$ig->find_intersects(\@trs,\@cdnas,{query_type=>'exon',subject_type=>'match_part',same_strand=>1,depth=>1,threshold=>0.90});
%tr_w_cdna = ();
%cdna_w_tr = ();
%gene_w_cdna = ();
foreach my $tr (@trs) {
    my $cdnas = $ig->get_ilist($tr);
    next unless (@{$cdnas || []});
    map{$cdna_w_tr{$_->secondary_node->src_seq}++}@{$cdnas || []};
    $tr_w_cdna{$tr->uniquename}++;
}
foreach my $g (@genes) {
    if (grep{exists $tr_w_cdna{$_->uniquename}}@{$g->nodes || []}) {
        $gene_w_cdna{$g->uniquename}++;
    }
}
ok(grep{$_ ne 'LD21171' && $_ ne 'GH14660' && $_ ne 'RE35072'}keys %cdna_w_tr, 0); #RE35072 now ov
ok(scalar(keys %cdna_w_tr), 38);
ok(scalar(@{$ig->query_overlaps || []}), 50);

#printf "number of cDNA intersect w tr: %d\n", scalar(keys %cdna_w_tr);
#printf "number of genes whose tr intersect w cDNA: %d\n",scalar(keys %gene_w_cdna);
#printf "number of tr intersect w cDNA: %d\n", scalar(keys %tr_w_cdna);
#printf "number of genes: %d\n",scalar(@genes);
#printf "number of TE: %d\n", scalar(@te);
#printf "number of tRNA: %d\n", scalar(@trna);
#map{
#    printf "%s: %s\n",$_->secondary_nodes->[0]->src_seq,join(",",map{$_->name}@{$ig->get_ilist($_) || []});
#}@cdnas;

$ig->find_intersects(\@cdnas,\@trs,{query_type=>'match_part',subject_type=>'exon',same_strand=>1,depth=>1,threshold=>0.98});
ok(scalar(@{$ig->subject_overlaps || []}), 38);
ok((grep{$_->name eq 'BcDNA:LD08743-RA' || $_->name eq 'BcDNA:LD08743-RD'}@{$ig->subject_overlaps || []}), 0);
ok((grep{$_->name eq 'BcDNA:LD08743-RB'}@{$ig->subject_overlaps || []}),1);

#printf "%s\n",join("\n",map{$_->name}@{$ig->subject_overlaps});