File: evec2pca.perl

package info (click to toggle)
eigensoft 7.2.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,696 kB
  • sloc: ansic: 32,354; perl: 470; makefile: 107; sh: 10
file content (52 lines) | stat: -rwxr-xr-x 1,462 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/perl

### translate .evec file to .pca file expected by eigenstrat program
### Note: .evec file does not contain entries for outliers
###       .pca  file does contain entries (set to all 0.0) for outliers

$k = $ARGV[0];
$evec = $ARGV[1]; 
$ind = $ARGV[2];  
$pca = $ARGV[3];  

open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading");
open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading");
open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing");

print PCA ("$k\n"); # number of output eigenvectors/eigenvalues
$line = <EVEC>; chomp($line); # eigvals line
my @array = split(/[\t ]+/,$line);
for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval
while($line = <EVEC>)
{
  chomp($line);
  $line = " " . $line;
  my @array = split(/[\t ]+/,$line);
  $l = @array; 
  unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); }
  $sample = $array[1];
  for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; }
  $found{$sample} = 1;
}

while($line = <IND>)
{
  chomp($line);
  my @array = split(/[\t ]+/,$line);
  $sample = $array[0];
  if($sample eq "") { $sample = $array[1]; }
  unless($found{$sample})
  {
    for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; }
  }
  for($x=0; $x<$k; $x++)
  {
    printf PCA (" ");
    if($evecarray{$sample}[$x] > 0) { printf PCA (" "); }
    printf PCA ("%.04f",$evecarray{$sample}[$x]);
  }
  printf PCA ("\n");
}