File: plot-score-matrix

package info (click to toggle)
last-align 1645-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,684 kB
  • sloc: cpp: 44,403; python: 5,212; ansic: 1,938; sh: 710; makefile: 457
file content (72 lines) | stat: -rwxr-xr-x 1,874 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#! /bin/sh
# Author: Martin C. Frith 2025

progName=$(basename $0)

help="Usage: $progName substitution-matrix-file [output.pdf]

Draw an image of a substitution matrix, for example, last-train
output.  If the matrix is codons versus amino acids, the standard
genetic code is shown by dots."

if test $# -lt 1 || test $# -gt 2
then echo "$help" && exit 1
fi

if test "$1" = --help || test "$1" = -h
then echo "$help" && exit 0
fi

matrixFile=$1
pdf=${2-$matrixFile.pdf}

geneticCode="\
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"

tmp=plot-score-matrix.$$

awk '/^ *[a-zA-Z*]/' "$matrixFile" | sed 's/ [^ ][^ ]*//65' > $tmp.m

lineCount=$(wc -l < $tmp.m)

columnCount=$(head -n1 $tmp.m | wc -w)

for i in $(seq 64)
do echo "$geneticCode" | cut -c$i | paste -s -
done |
    sort -k2 | awk '{print $1, NR}' | sort > $tmp.x

awk 'NR > 1 {print $1, '"$lineCount"' - NR + 1}' $tmp.m | sort |
    join -o1.2,2.2 $tmp.x - > $tmp.g

las=$(head -n1 $tmp.m | awk '{print (length($1) > 1) ? 2 : 1}')

{
    cat <<EOF
colors <- "Heat 2"
#colors <- "PinkYl"
#colors <- "OrYel"
margin <- 1.8
m <- as.matrix(read.table("$tmp.m", check.names=FALSE))
m <- m[nrow(m):1,]
x <- 1:ncol(m)
y <- 1:nrow(m)
width <- min(7, 1 + 0.14 * ncol(m))
pdf("$pdf", width=width, height=width*nrow(m)/ncol(m), pointsize=8)
par(mar=c(0, margin * ncol(m) / nrow(m), margin, 0))
par(mgp=c(3, 0.25, 0))
par(las=$las)
image(x, y, t(m), col=hcl.colors(12, colors), axes=FALSE, xlab="", ylab="")
axis(2, y, rownames(m), FALSE, family="mono")
axis(3, x, colnames(m), FALSE, family="mono")
EOF

test $columnCount = 64 && cat <<EOF
points(read.table("$tmp.g"), pch=16)
EOF
} | R --vanilla -s

rm $tmp.?