File: pickconsensusrep.sh

package info (click to toggle)
mmseqs2 18-8cc5c%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 34,388 kB
  • sloc: cpp: 87,288; ansic: 10,655; sh: 2,919; makefile: 142; perl: 13
file content (82 lines) | stat: -rwxr-xr-x 2,705 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/sh -e

fail() {
    echo "Error: $1"
    exit 1
}

notExists() {
    [ ! -f "$1" ]
}

[ -z "$MMSEQS" ] && echo "Please set the environment variable \$MMSEQS to your MMSEQS binary." && exit 1;
[ "$#" -ne 4 ] && echo "Please provide <seqDB> <clusterDB> <outClusterDB> <tmp>" && exit 1;
[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
[ ! -f "$2.dbtype" ] && echo "$2.dbtype not found!" && exit 1;
[ ! -d "$4" ] && echo "tmp directory $4 not found!" && mkdir -p "$4";
TMP_PATH="$4"

if notExists "${TMP_PATH}/msa.dbtype"; then
  # shellcheck disable=SC2086
  "$MMSEQS" result2msa "$1" "$1" "$2" "$TMP_PATH/msa" ${RESULT2MSA_PAR} \
        || fail "result2msa failed"
fi

if notExists "${TMP_PATH}/profile.dbtype"; then
  # shellcheck disable=SC2086
  "$MMSEQS" msa2profile "$TMP_PATH/msa" "$TMP_PATH/profile" ${MSA2PROFILE_PAR} \
        || fail "result2msa failed"
fi

if notExists "${TMP_PATH}/aln.dbtype"; then
    # shellcheck disable=SC2086
    "$MMSEQS" align "$TMP_PATH/profile" "$1" "$2"  "${TMP_PATH}/aln" || fail "align failed"
fi

if notExists "${TMP_PATH}/aln.tsv"; then
    # shellcheck disable=SC2086
    "$MMSEQS" prefixid "${TMP_PATH}/aln" "${TMP_PATH}/aln.tsv" --tsv ${VERBOSITY} || fail "prefixid1 -tsv failed"
fi

awk 'FNR == NR{ best[$1]=1; rep[$1] = $1; next}
{
    cluster = $1; member = $2; score = $3;
    if (!(cluster in best) || score > best[cluster]) {
        best[cluster] = score;
        rep[cluster] = member;
    }
}
END {
    for (cluster in rep) {
        print cluster "\t" rep[cluster];
    }
}' "${TMP_PATH}/aln.index" "${TMP_PATH}/aln.tsv" > "${TMP_PATH}/rep_mapping.txt"

if notExists "${TMP_PATH}/clu.tsv"; then
    # shellcheck disable=SC2086
    "$MMSEQS" prefixid "$2" "${TMP_PATH}/clu.tsv" --tsv ${VERBOSITY} || fail "prefixid2 -tsv failed"
fi

if notExists "${TMP_PATH}/updated_clu.tsv"; then
    awk 'FNR == NR{f[$1] = $2; next}
         $1 != prev { print f[$1] "\t" f[$1]; prev = $1; }
         $1 in f && $2 != f[$1]{print f[$1]"\t"$2}' "${TMP_PATH}/rep_mapping.txt" "${TMP_PATH}/clu.tsv" > "${TMP_PATH}/updated_clu.tsv"
fi

"$MMSEQS" tsv2db "${TMP_PATH}/updated_clu.tsv" "${3}" --output-dbtype 6 || fail "tsv2db failed"

if [ -n "$REMOVE_TMP" ]; then
    rm -f "${TMP_PATH}/updated_clu.tsv"
    rm -f "${TMP_PATH}/aln.tsv"
    rm -f "${TMP_PATH}/rep_mapping.txt"
    rm -f "${TMP_PATH}/clu.tsv"
    rm -f "${TMP_PATH}/updated_clu.tsv"
    # shellcheck disable=SC2086
    "$MMSEQS" rmdb "${TMP_PATH}/msa" ${VERBOSITY}
    # shellcheck disable=SC2086
    "$MMSEQS" rmdb "${TMP_PATH}/profile" ${VERBOSITY}
    # shellcheck disable=SC2086
    "$MMSEQS" rmdb "${TMP_PATH}/aln" ${VERBOSITY}

    rm -rf "${TMP_PATH}/pickconsensusrep.sh"
fi