1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
#!/usr/bin/perl -w
#
# convert_quals.pl
#
# Modify scale/encoding of quality values in a FASTQ file.
#
# Author: Ben Langmead
# Date: 5/5/2009
#
# p = probability that base is miscalled
# Qphred = -10 * log10 (p)
# Qsolexa = -10 * log10 (p / (1 - p))
# See: http://en.wikipedia.org/wiki/FASTQ_format
#
use strict;
use warnings;
use Getopt::Long;
my $inphred = 33;
my $insolexa = 0;
my $outphred = 0;
my $outsolexa = 64;
# Default: convert 33-based Phred quals into 64-based Solexa qualss
my $result =
GetOptions ("inphred=i" => \$inphred,
"insolexa=i" => \$insolexa,
"outphred=i" => \$outphred,
"outsolexa=i" => \$outsolexa);
$result == 1 || die "One or more errors parsing script arguments";
if($inphred > 0) {
$inphred >= 33 || die "Input base must be >= 33, was $inphred";
} else {
$insolexa >= 33 || die "Input base must be >= 33, was $insolexa";
}
sub log10($) {
return log(shift) / log(10.0);
}
sub round {
my($number) = shift;
return int($number + .5 * ($number <=> 0));
}
# Convert from phred qual to probability of miscall
sub phredToP($) {
my $phred = shift;
my $p = (10.0 ** (($phred) / -10.0));
($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from sol $phred";
return $p;
}
# Convert from solexa qual to probability of miscall
sub solToP($) {
my $sol = shift;
my $x = (10.0 ** (($sol) / -10.0));
my $p = $x / (1.0 + $x);
($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from x $x, phred $sol";
return $p;
}
# Convert from probability of miscall to phred qual
sub pToPhred($) {
my $p = shift;
($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p";
return round(-10.0 * log10($p));
}
# Convert from probability of miscall to solexa qual
sub pToSol($) {
my $p = shift;
($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p";
return 0.0 if $p == 1.0;
return round(-10.0 * log10($p / (1.0 - $p)));
}
while(<>) {
my $name = $_; print $name;
my $seq = <>; print $seq;
my $name2 = <>; print $name2;
my $quals = <>;
chomp($quals);
my @qual = split(//, $quals);
for(my $i = 0; $i <= $#qual; $i++) {
my $co = ord($qual[$i]);
my $p;
# Convert input qual to p
if($inphred > 0) {
$co -= $inphred;
$co >= 0 || die "Bad Phred input quality: $co";
$p = phredToP($co);
} else {
$co -= $insolexa;
$p = solToP($co);
}
# Convert p to output qual
if($outphred > 0) {
$co = pToPhred($p);
$co >= 0 || die "Bad Phred output quality: $co";
$co += $outphred;
} else {
$co = pToSol($p);
$co += $outsolexa;
}
$co >= 33 || die "Error: Output qual " . $co . " char is less than 33. Try a larger output base.";
print chr($co);
}
print "\n";
}
|