File: fragment_4dnpairs.pl

package info (click to toggle)
python-pairix 0.3.8-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 179,892 kB
  • sloc: ansic: 5,553; python: 1,207; sh: 602; perl: 464; makefile: 64
file content (134 lines) | stat: -rwxr-xr-x 4,790 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/perl 
##########
#The MIT License (MIT)
#
# Copyright (c) 2015 Aiden Lab
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#  THE SOFTWARE.
##########

# Perl script to convert to fragment map from infile. The infile should be in
# DCIC format:
# # HEADER 
# readname chr1 pos1 chr2 pos2 str1 str2
#
# The script also requires a restriction site file, which lists on 
# each line, the sorted locations of the enzyme restriction sites.
#
# Usage:  fragment_4dnpairs.pl <infile> <outfile> [site file]\n";

use POSIX;
use Getopt::Long;

$no_replacement=1;
&GetOptions ( 'a|allow-replacement' => sub { $no_replacement=0; });

$site_file = "/opt/juicer/restriction_sites/hg19_DpnII.txt";
# Check arguments
if (scalar(@ARGV) == 2) {
  ($infile,$outfile) = @ARGV;
}
elsif (scalar(@ARGV) == 3) {
  ($infile,$outfile,$site_file) = @ARGV;
}
else {
  print "Usage: fragment_4dnpairs.pl [--allow-replacement] <infile> <outfile> [site file]\n";
  print " <infile>: file in intermediate format to calculate statistics on\n";
  print " <outfile>: output, results of fragment search\n";  
  print " [site file]: list of restriction sites, one line per chromosome (default DpnII hg19)\n";
  print " --allow-replacement : allows replacing existing frag1/frag2 columns. Default: abort if the columns already exist.\n";
  exit;
}
# Global variables for calculating statistics
my %chromosomes;
my %hindIII;

# read in restriction site file and store as multidimensional array
open FILE, $site_file or die $!;

while (<FILE>) {
  my @locs = split;
  my $key = shift(@locs);
  my $ref = \@locs;
  $chromosomes{$key} = $ref;
	if ($key == "14") {
		$chromosomes{$key."m"} = $ref;  
		$chromosomes{$key."p"} = $ref;
	}
}
close(FILE);

# read in infile and calculate statistics
open INFILE, $infile or die $!;
open OUTFILE,">", $outfile or die $!;
$"="\t";
$frag1_colindex=-1;
$frag2_colindex=-1;
while (<INFILE>) {
  chomp;
  if(/^#columns:\s*/) { 
      my @headers = split/\s+/,$_;
      if((/frag1/ || /frag2/) && $no_replacement) { die "frag columns already exist. Aborting..\n"; }
      elsif((/frag1/ || /frag2/) && !$no_replacement) { 
          for my $i (1..$#headers){
              if($headers[$i] eq 'frag1') { $frag1_colindex=$i-1; }
              elsif($headers[$i] eq 'frag2') { $frag2_colindex=$i-1; }
          }
          print STDERR "$frag1_colindex, $frag2_colindex\n";  # Soo
          if($frag1_colindex == -1) { $frag1_colindex=$#headers; print OUTFILE "$_ frag1\n"; next; }
          elsif($frag2_colindex == -1) { $frag2_colindex=$#headers; print OUTFILE "$_ frag2\n"; next; }
          else { print OUTFILE "$_\n"; next; }
      }
      else { $frag1_colindex=$#headers; $frag2_colindex=$#headers+1; print OUTFILE "$_ frag1 frag2\n"; next; }
  }
  elsif(/^#/) { print OUTFILE "$_\n"; next; }
  my @original_record = split;
  my @record = @original_record[5,1,2,6,3,4];

  # find upper index of position in sites array via binary search
  my $index1 = &bsearch($record[2],$chromosomes{$record[1]});
  my $index2 = &bsearch($record[5],$chromosomes{$record[4]});
  $original_record[$frag1_colindex] = $index1;
  $original_record[$frag2_colindex] = $index2;
  print OUTFILE "@original_record\n";
}
close(INFILE);
close(OUTFILE);

# Binary search, array passed by reference
# search array of integers a for given integer x
# return index where found or upper index if not found
sub bsearch {
  my ($x, $a) = @_;            # search for x in array a
  my ($l, $u) = (0, @$a - 1);  # lower, upper end of search interval
  my $i;                       # index of probe
  while ($l <= $u) {
    $i = int(($l + $u)/2);
    if ($a->[$i] < $x) {
      $l = $i+1;
    }
    elsif ($a->[$i] > $x) {
      $u = $i-1;
    } 
    else {
      return $i+1; # found
    }
  }
  return $l;         
}