File: Assemblytics

package info (click to toggle)
assemblytics 1.2.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 208 kB
  • sloc: python: 577; perl: 234; sh: 110; makefile: 13
file content (143 lines) | stat: -rwxr-xr-x 6,056 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash

# Author: Maria Nattestad
# github.com/marianattestad/assemblytics

USAGE="Assemblytics delta output_prefix unique_length_required min_size max_size"

if [ -z "$1" ]
  then
    echo "ERROR in Assemblytics: No delta file given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$2" ]
  then
    echo "ERROR in Assemblytics: No output prefix given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$3" ]
  then
    echo "ERROR in Assemblytics: No unique length requirement parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$4" ]
  then
    echo "ERROR in Assemblytics: No minimum size parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$5" ]
  then
    echo "ERROR in Assemblytics: No maximum size parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi

DELTA=${1?"$USAGE"}
OUTPUT_PREFIX=${2?"$USAGE"}
UNIQUE_LENGTH=${3?"$USAGE"}
MINIMUM_SIZE=${4?"$USAGE"}
MAXIMUM_SIZE=${5?"$USAGE"}

>&2 echo Input delta file: $DELTA
>&2 echo Output prefix: $OUTPUT_PREFIX
>&2 echo Unique anchor length: $UNIQUE_LENGTH
>&2 echo Minimum variant size to call: $MINIMUM_SIZE
>&2 echo Maximum variant size to call: $MAXIMUM_SIZE


mkdir -p ${OUTPUT_PREFIX%/*}/

# In the web application, this log file is used to show status updates in the GUI
LOG_FILE=${OUTPUT_PREFIX%/*}/progress.log
>&2 echo "Logging progress updates in $LOG_FILE"

SCRIPT_PATH=$(dirname $BASH_SOURCE)
echo 'script path:' $SCRIPT_PATH

# Check that Rscript is available (needed by R scripts)
if [ -z $(which Rscript) ]; then
    >&2 echo "Error: No Rscript executable available in PATH: $PATH."
    exit
fi


echo "${OUTPUT_PREFIX##*/}" >> $LOG_FILE

echo "STARTING,DONE,Starting unique anchor filtering." >> $LOG_FILE

>&2 echo "1. Filter delta file"
$SCRIPT_PATH/Assemblytics_uniq_anchor.py --delta $DELTA --unique-length $UNIQUE_LENGTH --out $OUTPUT_PREFIX --keep-small-uniques
if [ -e $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz ];
then
    echo "UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments." >> $LOG_FILE
    >&2 echo "2. Finding variants between alignments"
    $SCRIPT_PATH/Assemblytics_between_alignments.pl $OUTPUT_PREFIX.coords.tab $MINIMUM_SIZE $MAXIMUM_SIZE all-chromosomes exclude-longrange bed > $OUTPUT_PREFIX.variants_between_alignments.bed

    if [ -e $OUTPUT_PREFIX.variants_between_alignments.bed ];
    then
        echo "BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments." >> $LOG_FILE
        >&2 echo "3. Finding variants within alignments"
        $SCRIPT_PATH/Assemblytics_within_alignment.py --delta $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz --min $MINIMUM_SIZE --output $OUTPUT_PREFIX.variants_within_alignments.bed

        if [ -e $OUTPUT_PREFIX.variants_within_alignments.bed ];
        then
            echo "WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together." >> $LOG_FILE
            >&2 echo "4. Combine variants between and within alignments";
            HEADER="#reference\tref_start\tref_stop\tID\tsize\tstrand\ttype\tref_gap_size\tquery_gap_size\tquery_coordinates\tmethod"
            cat <(echo -e $HEADER) $OUTPUT_PREFIX.variants_within_alignments.bed $OUTPUT_PREFIX.variants_between_alignments.bed > $OUTPUT_PREFIX.Assemblytics_structural_variants.bed

            if [ -e $OUTPUT_PREFIX.Assemblytics_structural_variants.bed ];
            then 
                echo "COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics." >> $LOG_FILE
                
                $SCRIPT_PATH/Assemblytics_variant_charts.R $OUTPUT_PREFIX $MINIMUM_SIZE $MAXIMUM_SIZE

                $SCRIPT_PATH/Assemblytics_index.py -coords $OUTPUT_PREFIX.coords.csv -out $OUTPUT_PREFIX
                
                $SCRIPT_PATH/Assemblytics_dotplot.R $OUTPUT_PREFIX

                cat $OUTPUT_PREFIX.coords.tab | awk '{print $7,$5}' OFS='\t' | sort | uniq | sort -k2,2nr >  $OUTPUT_PREFIX.coords.ref.genome
                cat $OUTPUT_PREFIX.coords.tab | awk '{print $8,$6}' OFS='\t' | sort | uniq | sort -k2,2nr >  $OUTPUT_PREFIX.coords.query.genome
                
                $SCRIPT_PATH/Assemblytics_Nchart.R $OUTPUT_PREFIX
                
                $SCRIPT_PATH/Assemblytics_summary.py -i $OUTPUT_PREFIX.Assemblytics_structural_variants.bed -min $MINIMUM_SIZE -max $MAXIMUM_SIZE > $OUTPUT_PREFIX.Assemblytics_structural_variants.summary

                # zip up results for quick download of all results
                zip $OUTPUT_PREFIX.Assemblytics_results.zip $OUTPUT_PREFIX.Assemblytics*
                
                # create a small preview file of the variants for the GUI
                head $OUTPUT_PREFIX.Assemblytics_structural_variants.bed | column -t > $OUTPUT_PREFIX.variant_preview.txt

                if grep -q "Total" $OUTPUT_PREFIX.Assemblytics_structural_variants.summary; 
                then
                    echo "SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully" >> $LOG_FILE
                else
                    echo "SUMMARY,FAIL,Step 5: Assemblytics_summary.py failed" >> $LOG_FILE
                    exit 1
                fi
            else
                echo "COMBINE,FAIL,Step 4: combining variants failed" >> $LOG_FILE
                exit 1
            fi
        else
            echo "WITHIN,FAIL,Step 3: Assemblytics_within_alignment.py failed: Possible problem before this step or with Python on server." >> $LOG_FILE
            exit 1
        fi
    else
        echo "BETWEEN,FAIL,Step 2: Assemblytics_between_alignments.pl failed: Possible problem with Perl or show-coords on server." >> $LOG_FILE
        exit 1
    fi
else
    echo "UNIQFILTER,FAIL,Step 1: Assemblytics_uniq_anchor.py failed: Possible problem with Python or Python packages on server." >> $LOG_FILE
    exit 1
fi