File: determine-phred

package info (click to toggle)
ea-utils 1.1.2+dfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 840 kB
  • sloc: ansic: 11,459; cpp: 3,752; perl: 426; makefile: 74; sh: 42
file content (86 lines) | stat: -rwxr-xr-x 1,611 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl
use strict;

my $ssiz=7000;			# sample size

if ($ARGV[0] =~ /^-[h?]/) {
	print "Usage: determine-phred FILE

Reads a sam, fastq or pileup, possibly gzipped and returns the phred-scale, 
  either 64 or 33, based on a quick scan of the data in the file.
";
	exit 0;
}
my $cnt;
my $dphred = 64;
if ($ARGV[0] =~ /\.gz$/) {
	$ARGV[0] = "gunzip -c '$ARGV[0]'|";
}
my $qual;
my $comm;
my $fmt;
if (@ARGV > 1) {
	my @mult = @ARGV;
	for my $f (@mult) {
		@ARGV = ($f);
		determine();
		print "$f\t$dphred\n";
	}
} else {
	determine();
	print "$dphred\n";
}

sub determine {
	$_ = <>;
	if (/^\@/ && ! /^\@SQ\t/) {
		# fastq
		scalar <>;              # read
		$comm = scalar <>;      # comment
		if (!(substr($comm,0,1) eq '+')) {
			die "Unknown file format\n";
		}
		$qual = <>;
		chomp $qual;
		$fmt = 'fq';
	} elsif (/^\S+\t\d+\t[ACTGN]\t\d+\t\S+\t(\S+)$/i) {
		$qual = $1;
		$fmt = 'pileup';
	} else {
		# sam
		$fmt = 'sam';
		$qual = (split(/\t/, $_))[10];
	}
	if (!$qual) {
		die "Unknown file format\n";
	}
    my $rc = 1;
	while($qual) {
        ++$rc;
		for (my $i =length($qual)/2; $i < length($qual); ++$i) {
			if (ord(substr($qual,$i,1)) < 64) {
				$dphred = 33;
				$cnt=$ssiz;	# last
				last;
			}
		}
		$qual = '';
		last if ++$cnt >= $ssiz;	# got enough
		if ($fmt eq 'fq') {
			# fastq
			last if ! scalar <>;		# id
			last if ! scalar <>;		# read
			last if ! scalar <>;		# comment
			$qual = <>;
			chomp $qual;
		} elsif ($fmt eq 'pileup') {
			$qual = (split(/\t/, $_))[5];
		} else {
			# sam
			$qual = (split(/\t/, $_))[10];
		}
	}
    if ($rc < 10) {
		$dphred = 33;
    }
}