File: partit2sga.pl

package info (click to toggle)
chip-seq 1.5.5-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid, trixie
  • size: 21,320 kB
  • sloc: ansic: 10,053; perl: 1,493; makefile: 103; sh: 91
file content (95 lines) | stat: -rwxr-xr-x 2,603 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl
#
# reformats partitioning output files (SGA) into bed format
# usage: partit2bed.pl [-t track_name -d desc -n chr_nb -b chr_start -e -chr_end] <ChIP-part.output>

use strict;
use Getopt::Long;
use Storable qw (retrieve nstore);         # package to store persistently variables in files [http://search.cpan.org/author/AMS/Storable-2.07/Storable.pm]

my %opt;
my @options = ("help", "h", "db=s");

my $DB = "/usr/share/chip-seq/";
#my $DB = "/db/genome/";

if( ! GetOptions( \%opt, @options ) ) { &Usage(); }
&Usage() if defined($opt{'help'}) || defined($opt{'h'});

#define options
if ($opt{'db'} ne '') {
    $DB = $opt{'db'};
}

open FH, $DB."chro_idx.nstorage" or die "Wrong Chrom Id Storable file $DB.\"chro_idx.nstorage\": $!";
my $chroac = retrieve($DB."chro_idx.nstorage");

&Usage() if $#ARGV < 0;

my $file = $ARGV[0];

# Chromosome size table
my %chr_size = ();

my @pos = ();

# Read First line of Partition File
#
# Get chrom sizes
my $assembly = "";
# Read Firstline and get assembly from chrom identifier
open(PART, "<$file") || die "can't read $file : $!";
my $firstline = <PART>;
chomp $firstline;
my @field=split(/\t/,$firstline);
if (defined($field[0])){
  $assembly = $chroac->{'assembly'}->{$field[0]};
}
if ($assembly ne "") {
  open (CHR, "/home/local/db/genome/$assembly/$assembly".".chrom.sizes") || die "can't open /home/local/db/genome/$assembly/$assembly'.'.chrom.sizes' : $!";
  while (<CHR>) {
    chomp;
    my @f=split(/\t/,$_); 
    $chr_size{$f[0]} = $f[1];
  }
  close (CHR);
}
seek (PART, 0, 0);
while (<PART>){
# Partitioning algorithm reports positions of start and end of tag, 
#NC_000001.9	CTCF	227595	+	1  Start Line
#NC_000001.9	CTCF	227633	-	1  End Line
#Generate centered SGA file
  my $lin=$_;
  chomp $lin;
  my @field=split(/\t/,$lin);
  if (defined($field[0])){    
    if ($field[3] eq '+' || $field[3] eq '0') {
      $pos[0] = $field[2];
    } elsif ($field[3] eq '-') {
      $pos[1] = $field[2];
      my $center = int(($pos[0] + $pos[1])/2); 
      next if (defined($chr_size{$field[0]}) && $center > $chr_size{$field[0]});
      print $field[0],"\t",$field[1],"\t",$center,"\t"."0"."\t$field[4]\n";
      @pos = ();
    }
  }
}
close (PART);

sub Usage {
    print STDERR <<"_USAGE_";

    partit2sga.pl [options] <ChIP-Part out file> 

    where options are:
        -h|--help                Show this stuff
        --db <path>              Use <path> to locate Chrom Id Storable File 'chro_idx.nstorage'

    The program converts the output of the ChIP-Seq partitioning program to centered SGA format.

_USAGE_
    exit(1);
}

1;