1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
#!/usr/bin/perl
#
# reformats partitioning output files (SGA) into bed format
# usage: partit2bed.pl [-t track_name -d desc -n chr_nb -b chr_start -e -chr_end] <ChIP-part.output>
use strict;
use Getopt::Long;
use Storable qw (retrieve nstore); # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm]
my %opt;
my @options = ("help", "h", "db=s");
my $DB = "/usr/share/chip-seq/";
#my $DB = "/db/genome/";
if( ! GetOptions( \%opt, @options ) ) { &Usage(); }
&Usage() if defined($opt{'help'}) || defined($opt{'h'});
#define options
if ($opt{'db'} ne '') {
$DB = $opt{'db'};
}
open FH, $DB."chro_idx.nstorage" or die "Wrong Chrom Id Storable file $DB.\"chro_idx.nstorage\": $!";
my $chroac = retrieve($DB."chro_idx.nstorage");
&Usage() if $#ARGV < 0;
my $file = $ARGV[0];
# Chromosome size table
my %chr_size = ();
my @pos = ();
# Read First line of Partition File
#
# Get chrom sizes
my $assembly = "";
# Read Firstline and get assembly from chrom identifier
open(PART, "<$file") || die "can't read $file : $!";
my $firstline = <PART>;
chomp $firstline;
my @field=split(/\t/,$firstline);
if (defined($field[0])){
$assembly = $chroac->{'assembly'}->{$field[0]};
}
if ($assembly ne "") {
open (CHR, "/home/local/db/genome/$assembly/$assembly".".chrom.sizes") || die "can't open /home/local/db/genome/$assembly/$assembly'.'.chrom.sizes' : $!";
while (<CHR>) {
chomp;
my @f=split(/\t/,$_);
$chr_size{$f[0]} = $f[1];
}
close (CHR);
}
seek (PART, 0, 0);
while (<PART>){
# Partitioning algorithm reports positions of start and end of tag,
#NC_000001.9 CTCF 227595 + 1 Start Line
#NC_000001.9 CTCF 227633 - 1 End Line
#Generate centered SGA file
my $lin=$_;
chomp $lin;
my @field=split(/\t/,$lin);
if (defined($field[0])){
if ($field[3] eq '+' || $field[3] eq '0') {
$pos[0] = $field[2];
} elsif ($field[3] eq '-') {
$pos[1] = $field[2];
my $center = int(($pos[0] + $pos[1])/2);
next if (defined($chr_size{$field[0]}) && $center > $chr_size{$field[0]});
print $field[0],"\t",$field[1],"\t",$center,"\t"."0"."\t$field[4]\n";
@pos = ();
}
}
}
close (PART);
sub Usage {
print STDERR <<"_USAGE_";
partit2sga.pl [options] <ChIP-Part out file>
where options are:
-h|--help Show this stuff
--db <path> Use <path> to locate Chrom Id Storable File 'chro_idx.nstorage'
The program converts the output of the ChIP-Seq partitioning program to centered SGA format.
_USAGE_
exit(1);
}
1;
|