1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
#!/bin/bash
# This file parses UCSC Chromosome Band table into a Go source code file.
#
# This script will only work on the Chromosome Band table - it will not
# work on an 'Assembly' or 'Scaffold' table.
#
# The prefix, e.g. chr, will be used to label the chromosomes (e.g. chr1, chr2 ... )
# By default, "chr" is used. The package will be used to name the generated package.
#
# To download data tables, see http://genome.ucsc.edu/cgi-bin/hgTables
file=$1
prefix=$2
species=$3
package=$4
if [ ! -n "$file" ]; then
echo "Please specify the UCSC karyotype table file"
exit
fi
if [ ! -n "$prefix" ]; then
prefix="chr"
fi
label="$(tr '[:lower:]' '[:upper:]' <<< ${prefix:0:1})${prefix:1}"
(
echo -e "// DO NOT EDIT. This file was autogenerated by parse.karyotype\n"
echo "// Package $package defines chromosome and band intervals for the $species karyotype based on the $package assembly."
echo -e "package $package\n"
echo -e "import (\n\t\"github.com/biogo/biogo/feat\"\n\t\"github.com/biogo/biogo/feat/genome\"\n)\n"
# # chromosomes
echo 'var ('
< $file zcat \
| grep -v '^#' \
| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
| awk '{print $1,$0}' \
| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
| sort -k1,1g -k3rn,3 \
| sort -u -k1g,1 \
| awk -v prefix=$prefix -v label=$label '{print "\t"label$2" = genome.Chromosome{Chr: \""prefix$2"\", Desc: \"Chromosome\", Length:",$4"}"}'
echo -e ')\n'
echo 'var Chromosomes = []*genome.Chromosome{'
< $file zcat \
| grep -v '^#' \
| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
| awk '{print $1,$0}' \
| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
| sort -k1,1g -k3rn,3 \
| sort -u -k1g,1 \
| awk -v label=$label '{print "\t&"label$2","}'
echo -e '}\n'
# bands
echo 'var ('
< $file zcat \
| grep -v '^#' \
| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
| awk '{print $1,$0}' \
| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
| sort -k1,1g -k3n,3 \
| awk -v label=$label '{print "\t"label$2"_"$5" = genome.Band{Band: \""$5"\", Desc: \"Band\", StartPos:",$3", EndPos:",$4", Giemsa: \""$6"\", Chr: &"label$2"}"}' \
| sed 's/\.\(.*=\)/_\1/'
echo -e ')\n'
echo 'var Bands = []*genome.Band{'
< $file zcat \
| grep -v '^#' \
| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
| awk '{print $1,$0}' \
| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
| sort -k1,1g -k3n,3 \
| awk -v label=$label '{print "\t&"label$2"_"$5","}' \
| sed 's/\./_/'
echo '}'
# init
cat << 'END'
//line parse.karyotype:82
func init() {
for _, b := range Bands {
b.Chr.(*genome.Chromosome).Features = append(b.Chr.(*genome.Chromosome).Features, b)
}
for _, c := range Chromosomes {
bc := make([]feat.Feature, len(c.Features))
copy(bc, c.Features)
c.Features = bc
}
}
END
) | gofmt
|