File: paf2mhap.pl

package info (click to toggle)
miniasm 0.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 632 kB
  • sloc: ansic: 2,528; perl: 120; sh: 66; makefile: 29
file content (35 lines) | stat: -rwxr-xr-x 870 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env perl

use strict;
use warnings;
use Getopt::Std;

my %opts = ();
getopts("p", \%opts);
my $is_100 = defined($opts{p});

die("Usage: paf2mhap.pl [-p] <in.fa> <in.paf>\n") if (@ARGV == 0);

warn("Parsing FASTA to create the name<=>id table...\n");
my %hash;
my $fn = shift(@ARGV);
open(FH, $fn =~ /\.gz$/? "gzip -dc {} |" : $fn) || die;
my $cnt = 0;
while (<FH>) {
	if (/^>(\S+)/) {
		$hash{$1} = ++$cnt unless defined($hash{$1});
	}
}
close(FH);

warn("Converting PAF to MHAP format...\n");
while (<>) {
	chomp;
	my @t = split;
	next if ($t[0] eq $t[5]); # NB: ignore self matches
	my $cnt = /cm:i:(\d+)/? $1 : 0;
	my $r = $t[9] / $t[10];
	$r = sprintf("%.4f", $is_100? 100. * $r : $r);
	die if !defined($hash{$t[0]}) || !defined($hash{$t[5]});
	print(join(" ", $hash{$t[0]}, $hash{$t[5]}, $r, $cnt, 0, @t[2,3,1], $t[4] eq '+'? 0 : 1, @t[7,8,6]), "\n");
}