File: convert_quals.pl

package info (click to toggle)
bowtie 1.3.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 15,816 kB
  • sloc: cpp: 37,094; perl: 5,806; ansic: 1,474; sh: 1,197; python: 462; makefile: 419
file content (114 lines) | stat: -rwxr-xr-x 2,662 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/perl -w

#
# convert_quals.pl
#
# Modify scale/encoding of quality values in a FASTQ file.
#
# Author: Ben Langmead
#   Date: 5/5/2009
#
# p = probability that base is miscalled
# Qphred = -10 * log10 (p)
# Qsolexa = -10 * log10 (p / (1 - p))
# See: http://en.wikipedia.org/wiki/FASTQ_format
#

use strict;
use warnings;
use Getopt::Long;

my $inphred   = 33;
my $insolexa  = 0;
my $outphred  = 0;
my $outsolexa = 64;

# Default: convert 33-based Phred quals into 64-based Solexa qualss

my $result =
	GetOptions ("inphred=i"   => \$inphred,
	            "insolexa=i"  => \$insolexa,
	            "outphred=i"  => \$outphred,
	            "outsolexa=i" => \$outsolexa);
$result == 1 || die "One or more errors parsing script arguments";

if($inphred > 0) {
	$inphred >= 33 || die "Input base must be >= 33, was $inphred";
} else {
	$insolexa >= 33 || die "Input base must be >= 33, was $insolexa";
}

sub log10($) {
	return log(shift) / log(10.0);
}

sub round {
    my($number) = shift;
    return int($number + .5 * ($number <=> 0));
}

# Convert from phred qual to probability of miscall
sub phredToP($) {
	my $phred = shift;
	my $p = (10.0 ** (($phred) / -10.0));
	($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from sol $phred";
	return $p;
}

# Convert from solexa qual to probability of miscall
sub solToP($) {
	my $sol = shift;
	my $x = (10.0 ** (($sol) / -10.0));
	my $p = $x / (1.0 + $x);
	($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p, from x $x, phred $sol";
	return $p;
}

# Convert from probability of miscall to phred qual
sub pToPhred($) {
	my $p = shift;
	($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p";
	return round(-10.0 * log10($p));
}

# Convert from probability of miscall to solexa qual
sub pToSol($) {
	my $p = shift;
	($p >= 0.0 && $p <= 1.0) || die "Bad prob: $p";
	return 0.0 if $p == 1.0;
	return round(-10.0 * log10($p / (1.0 - $p)));
}

while(<>) {
	my $name = $_;  print $name;
	my $seq = <>;   print $seq;
	my $name2 = <>; print $name2;
	my $quals = <>;
	chomp($quals);
	my @qual = split(//, $quals);
	for(my $i = 0; $i <= $#qual; $i++) {
		my $co = ord($qual[$i]);
		my $p;
		# Convert input qual to p
		if($inphred > 0) {
			$co -= $inphred;
			$co >= 0 || die "Bad Phred input quality: $co";
			$p = phredToP($co);
		} else {
			$co -= $insolexa;
			$p = solToP($co);
		}
		# Convert p to output qual
		if($outphred > 0) {
			$co = pToPhred($p);
			$co >= 0 || die "Bad Phred output quality: $co";
			$co += $outphred;
		} else {
			$co = pToSol($p);
			$co += $outsolexa;
		}
		$co >= 33 || die "Error: Output qual " . $co . " char is less than 33.  Try a larger output base.";
		print chr($co);
	}
	print "\n";
}