File: colorize_fasta.pl

package info (click to toggle)
bowtie 1.2.2%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 16,704 kB
  • sloc: cpp: 35,614; perl: 5,903; ansic: 1,247; sh: 1,128; python: 483; makefile: 426
file content (46 lines) | stat: -rwxr-xr-x 1,162 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/perl -w

##
# colorize_fasta.pl
#
# Convert nucleotide FASTA input to colorspace FASTA output.
# Colorspace versions of nucleotide sequnences are 1 character shorter.
# Names are unchanged.  No primer base is given.
#

##
# Given a string in nucleotide space, convert to colorspace.
#
sub colorize($$) {
	my ($s, $nucs) = @_;
	defined($s) || die;
	my %cmap = (
		"AA" => "0", "CC" => "0", "GG" => "0", "TT" => "0",
		"AC" => "1", "CA" => "1", "GT" => "1", "TG" => "1",
		"AG" => "2", "GA" => "2", "CT" => "2", "TC" => "2",
		"AT" => "3", "TA" => "3", "CG" => "3", "GC" => "3",
		"NA" => ".", "NC" => ".", "NG" => ".", "NT" => ".",
		"AN" => ".", "CN" => ".", "GN" => ".", "TN" => ".",
		"NN" => "."
	);
	my %nmap = ("0" => "A", "1" => "C", "2" => "G", "3" => "T", "." => "N");
	my $ret = "";
	for(my $i = 0; $i < length($s)-1; $i++) {
		my $di = uc substr($s, $i, 2);
		$di =~ tr/-NnMmRrWwSsYyKkVvHhDdBbXx/N/;
		defined($cmap{$di}) || die "Bad dinuc: $di\n";
		$ret .= ($nucs ? $nmap{$cmap{$di}} : $cmap{$di});
	}
	return $ret;
}

while(<>) {
	next if /^;/;
	next if /^#/;
	if(/^>/) {
		print $_;
	} else {
		chomp;
		print colorize($_, 0)."\n";
	}
}