File: composite_LD.PLS

package info (click to toggle)
bioperl 1.5.2.102-3
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 26,228 kB
  • ctags: 10,586
  • sloc: perl: 151,969; xml: 5,803; makefile: 44
file content (107 lines) | stat: -rw-r--r-- 2,627 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/perl -w
# -*-Perl-*- 
# $Id: composite_LD.PLS,v 1.8 2003/11/13 00:32:09 jason Exp $

=head1 NAME 

composite_LD -i filename.prettybase.txt --sortbyld E<gt> outfile

=head1 SYNOPSIS

  composite_LD -i filename.prettybase [-o out.LD] [-f prettybase/csv] [--sortbyld] [--noconvertindels]

=head2 DESCRIPTION

This a script which allows an easy way to calculate composite LD.  

=head2 OPTIONS

-i or --in     filename

-f or --format genotype format (prettybase or CSV)

--sortbyld     To see data sorted by LD instead of just all the 
               site1/site2 pair LD values.

-o or --out    output filename, otherwise will print to STDOUT

--noconvert    (applicable for prettybase format file only)
               if specified will NOT attempt to convert indel
               states to 'I' and delete states ('-') to 'D'.

-h or --help   see this documentation

=head2 AUTHOR Jason Stajich, Matthew Hahn

For more information contact:

Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>


=cut

use strict;

use Bio::PopGen::IO;
use Bio::PopGen::Statistics;
use Bio::PopGen::Population;

use Getopt::Long;

my ($file,$outfile,$sortbyld,$format,$noconvert,$verbose);
$format = 'prettybase'; # default format is prettybase
GetOptions(
	   'i|in:s'       => \$file, # pass the filename as 
	   'o|out:s'      => \$outfile,
	   'f|format:s'   => \$format,
	   'sortbyld'     => \$sortbyld,
	   'noconvert'    => \$noconvert,
	   'v|verbose'    => \$verbose,
	   'h|help'       => sub { system('perldoc', $0);
				   exit; }, 
	   );

if( ! $file ) { 
    $file = shift @ARGV;  # if no -i specified
}

my $io = Bio::PopGen::IO->new(-format => $format,
			      -verbose=> $verbose,
			      -CONVERT_INDEL_STATES => ! $noconvert,
			      -file   => $file);

my $stats = Bio::PopGen::Statistics->new(-verbose => $verbose);
my $pop = $io->next_population;

my %LD_matrix = $stats->composite_LD($pop);

# sites can be ordered by sorting their names

my @sites;
my $out;
if( $outfile ) { 
    open($out, ">$outfile") || die("$outfile: $!");
} else { 
    $out = \*STDOUT;
}
foreach my $site1 ( sort keys %LD_matrix ) {
    foreach my $site2 ( sort keys %{ $LD_matrix{$site1} } ) {
	my $LD = $LD_matrix{$site1}->{$site2}; # LD for site1,site2 
	if( $sortbyld ) {
	    push @sites, [ $site1,$site2,@$LD];
	} else { 
	    printf $out "%s,%s - LD=%.4f chisq=%.4f\n",$site1,$site2,@$LD;
	}
    }
}

if( $sortbyld ) {
    foreach my $s ( sort { $b->[3] <=> $a->[3] } @sites ) {
	my ($site1,$site2,$ld,$chisq) = @$s;
	print $out "$site1,$site2 - LD=$ld, chisq=$chisq\n";
    }
}