File: generegion.pl

package info (click to toggle)
libchado-perl 1.23-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 23,976 kB
  • ctags: 10,378
  • sloc: xml: 192,540; sql: 165,945; perl: 28,339; sh: 101; python: 73; makefile: 46
file content (167 lines) | stat: -rwxr-xr-x 4,466 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env perl 

# generegion_soi.pl

# --------------------------------
# sshu@fruitfly.org
# --------------------------------

use lib $ENV{GMOD_MODULE_PATH};
use FindBin qw($RealBin);
use lib (($ENV{SOI_ROOT}) ||
                     (($INC[0]=~/^\./)?"$RealBin/$INC[0]":"$RealBin/.."));

use Carp;
use strict;

use FileHandle;
use SOI::Adapter;
use SOI::FeatureDecor;

use Getopt::Long;

my $argh = {};
GetOptions($argh,
           "dbname|d=s",
           "outfile|o=s",
           "template=s",
           "args|arg=s%",
           "extend|e=s",
           "so_cv_name|so_cv|so=s",
           "dump",
           "display",
           "format|w=s",
           "noresults",
           "help",
          );

if ($argh->{help}) {
    usage();
    exit 0;
}

# set up the database adapter
if (!$argh->{dbname}) {
    print STDERR "You MUST specify a database name; for example -d gadfly3\n\n";
    usage();
    exit 1;
}
my $ad;
eval {
    $ad  =
      SOI::Adapter->new($argh->{dbname});
};
if ($@) {
    print STDERR "There was an error connecting to $argh->dbname}\n\n";
    print STDERR $@;
    exit 1;
}

if ($argh->{so_cv_name}) {
    $ad->SO_cv_name($argh->{so_cv_name});
}

my @cgs = @ARGV;
if ($argh->{template}) {
    my $features = $ad->get_features_by_template($argh->{template}, $argh->{args});
    map{push @cgs, $_->uniquename}@{$features || []};
}

my $out = $argh->{outfile};
if (@cgs >1 && ($out || $argh->{display})) {
    die("can't combine multiple cgs into 1 file or display");
}

my $ext =( $argh->{extend} ) || 5000;
foreach my $cg (@cgs) {
    my ($range,$genes) =
      $ad->get_features_by_gene($cg, {extend=>$ext});
    if (!@{$genes || []}) {
        print "NO SUCH GENE: $cg\n";
        next;
    }
    #get around arm residues stored problem (wastefull & slow)
    my $GBs = $ad->get_f({range=>$range}, {feature_types=>'golden_path_region',noauxillaries=>1});
    my $arm = SOI::Feature->new({type=>'chromosome_arm',name=>$range->{src}});
    $arm->nodes($GBs);
    my ($fmin, $fmax) = ($range->{fmin},$range->{fmax});
    my $new_f = $arm->stitch_child_segments($fmin,$fmax, {name=>"$cg-region-ext$ext"});
    $arm->hash->{residues} = "";
    my $ans;
    unless ($argh->{noresults}) {
        my $rsets = $ad->get_results({range=>$range});
        map{$_->transform($new_f)}(@{$rsets || []});
        $ans = $ad->get_analysis();
        my %an_h;
        map{$an_h{$_->analysis_id}=$_}@{$ans || []};
        foreach my $rset (@{$rsets || []}) {
            my $an = $an_h{$rset->analysis_id};
            $an->add_node($rset) if ($an);
        }
    }
    map{$_->transform($new_f)}@{$genes};
    $arm->nodes([$new_f,@{$genes}, @{$ans || []}]);

    unless ($argh->{display}) {
        my $format = $argh->{format} || 'soi';
        $format =~ s/_xml//;
        my $method = sprintf("to_%s_xml", $format);
        unless ($arm->can($method)) {
            $method = "to_soi_xml";
            $format = "soi";
        }
        $out = ( $argh->{outfile} ) || "$cg.$format.xml"; 
        $arm->$method("$out");
    } else {
        my ($g) = grep{$_->name eq $cg}@{$genes};
        SOI::Visitor->remove_strand($arm,-$g->strand);
        my $d = SOI::FeatureDecor->new($arm);
        my $p = $d->make_panel;
        my $tmpf = "./tmp$cg.gif";
        open(F, ">$tmpf") or die;
        print F $p->gd->png;
        close(F);
        `xv $tmpf`;
        `rm -f $tmpf`;
    }
}


print STDERR "Done!\n";
$ad->close_handle;

sub usage {
    print <<EOM
dump_generegion_soi.pl [-d|dbname <chado DATABASE NAME>] [-extend|e NO OF BASES] [-template <FILE>] [-outfile|o filename to put xml in] [-format <FORMAT>] [-noresults] cg-list

This script will export per-CG xml files. you can specify a list of
CGs on the command line, or you can give it a file of CG ids (just CG
ids, not gene symbols)

If outfile is specified, only a single CG can be exported to the file.

Default format is xml (soi). you can also get chaos xml:

dump_generegion_soi.pl -d chado -format chaos CG6699

you can als get GAME xml:

dump_generegion_soi.pl -d chado -format game CG6699

To get the gene region without evidence:

dump_generegion_soi.pl -d chado -format chaos -noresults CG6699

to use template, here is one example (get genes whose protein has 2 mature_peptides):

dump_generegion_soi.pl -d chado -format chaos -template ../templates/genes_by_child_count.soi -arg ptype=protein -arg ctype=mature_peptide -arg count=2 -arg operator='='


EOM
}