File: dnaclust-abun.sh

package info (click to toggle)
dnaclust 3-7
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 720 kB
  • sloc: cpp: 3,630; sh: 516; makefile: 64
file content (74 lines) | stat: -rw-r--r-- 2,156 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
similarity=0.98
threads=1

print_help()
{
    fold --spaces <<EOF
Usage: dnaclust-ref [OPTIONS...]
DNACLUST helper script to cluster sequences using a reference database.

  -c CENTERS         Fasta file of cluster centers/references.
  -d                 After clustering with reference database, perform de novo clustering.
  -r SIMILARITY      Set similarity between cluster center and cluster
                     sequences (default=0.98)
  -t THREADS         Set the number of threads to use
  -i INPUT_FILE      Fasta file of sequences to be clustered.
  -v                 Print verbose messages to standard error
  -h                 Give this help list

The sequences to be clustered are read from the STDIN. The cluster centers are written to STDOUT. Messages are written to STDERR.
EOF
}

while getopts "c:i:dr:t:vhln" option
do
    case $option in
  c) cluster_centers="$OPTARG";;
  i) input="$OPTARG";;
  d) de_novo_cluster=0;;
    r) similarity="$OPTARG";;
  t) threads="$OPTARG";;
    v) verbose=0;;
  l) left_gaps_allowed=0;;
  n) no_overlap=0;;
    h) print_help; exit 0;;
    [?]) print_help; exit 1;;
    esac
done

print_message()
{
    if [ $verbose ]
    then
    echo "`date +%T` $1" >&2
    fi
}

parameters=""
if [ $left_gaps_allowed ]
then
  parameters+=" --left-gaps-allowed "
fi

if [ $no_overlap ]
then
  parameters+=" --no-overlap "
fi

dnaclust_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
print_message "$dnaclust_path"
#exit 1
tempdir=`mktemp -d -p .`
#tempdir="tmpref/"
trap "rm -fr $tempdir" EXIT

#sequences_sorted=`mktemp -p $tempdir`
db_sorted=`basename ${cluster_centers} .fasta`.sorted.fasta
# Reads the sequences from STDIN.
print_message "Reading and sorting the database sequences: $tempdir/${db_sorted}" 
#"/usr/lib/dnaclust/fastasort" --random-shuffle > $sequences_sorted 
cat $cluster_centers | "/usr/lib/dnaclust/fastasort" > $tempdir/${db_sorted}

print_message "Recruiting from the sequences, using database."
"$dnaclust_path/dnaclust" $parameters -s $similarity -t $threads --no-k-mer-filter -i $input -p $tempdir/${db_sorted} -r | awk '{if (NF > 1) print $0}' > $input.db.clusters