File: star-align

package info (click to toggle)
dnaclust 3-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 720 kB
  • sloc: cpp: 3,630; sh: 516; makefile: 64
file content (44 lines) | stat: -rwxr-xr-x 1,501 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
similarity=0.80

print_help()
{
    fold --spaces <<EOF
Usage: star-align [OPTIONS...]
Build a multiple sequence alignment (MSA) for a set of sequences, using star alignment heuristic.

  -i FILE                    The file containing the sequences in FASTA format. 
                             Note that the MSAs are built based on the ids
                             read from STDIN.
  -h                         Give this help list

The ids of the sequnces to be aligned are read from STDIN. The MSAs are written to STDOUT. One MSA is generated for each line. The first sequence id on each line is used as the center of the 'star'.

Example: To build a MSA for the first cluster generated by 'dnaclust' do:
head -n 1 sequences.cluster | ./star-align -i sequences.fasta > alignment.fasta
EOF
}

while getopts "i:h" option
do 
    case $option in
	i) input_fasta="$OPTARG";;
	h) print_help; exit 0;;
	[?]) print_help; exit 1;;
    esac
done

dnaclust_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

tempdir=`mktemp -d -p .`
trap "rm -fr $tempdir" EXIT

center_fasta=`mktemp -p $tempdir`
sequences_fasta=`mktemp -p $tempdir`

while read line
do
    echo $line | "/usr/lib/dnaclust/fastaselect" -f $input_fasta --cluster-centers > $center_fasta
    echo $line | "/usr/lib/dnaclust/fastaselect" -f $input_fasta --cluster-sequences > $sequences_fasta
    "$dnaclust_path/dnaclust" -s $similarity --multiple-alignment --no-k-mer-filter -i $sequences_fasta -p $center_fasta -r
done