File: web_pipeline

package info (click to toggle)
assemblytics 1.0%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 416 kB
  • sloc: python: 564; perl: 234; php: 124; sh: 94; makefile: 11
file content (118 lines) | stat: -rwxr-xr-x 4,976 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/bin/bash
USAGE="Assemblytics delta output_prefix unique_length_required min_size max_size"
if [ -z "$1" ]
  then
    echo "ERROR in Assemblytics: No delta file given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$2" ]
  then
    echo "ERROR in Assemblytics: No output prefix given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$3" ]
  then
    echo "ERROR in Assemblytics: No unique length requirement parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$4" ]
  then
    echo "ERROR in Assemblytics: No minimum size parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$5" ]
  then
    echo "ERROR in Assemblytics: No maximum size parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi


# Author: Maria Nattestad
# Email: mnattest@cshl.edu

DELTA=${1?"$USAGE"}
OUTPUT_PREFIX=${2?"$USAGE"}
UNIQUE_LENGTH=${3?"$USAGE"}
MINIMUM_SIZE=${4?"$USAGE"}
MAXIMUM_SIZE=${5?"$USAGE"}

>&2 echo Input delta file: $DELTA
>&2 echo Output prefix: $OUTPUT_PREFIX

LOG_FILE=/dev/stdout


echo "${OUTPUT_PREFIX##*/}" >> $LOG_FILE

echo "STARTING,DONE,Starting unique anchor filtering." >> $LOG_FILE

>&2 echo "1. Filter delta file"
./bin/Assemblytics_uniq_anchor.py --delta $DELTA --unique-length $UNIQUE_LENGTH --out $OUTPUT_PREFIX --keep-small-uniques
if [ -e $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz ];
then
    echo "UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments." >> $LOG_FILE
    >&2 echo "2. Finding variants between alignments"
    ./bin/Assemblytics_between_alignments.pl $OUTPUT_PREFIX.coords.tab $MINIMUM_SIZE $MAXIMUM_SIZE all-chromosomes exclude-longrange bed > $OUTPUT_PREFIX.variants_between_alignments.bed

    if [ -e $OUTPUT_PREFIX.variants_between_alignments.bed ];
    then
        echo "BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments." >> $LOG_FILE
        >&2 echo "3. Finding variants within alignments"
        ./bin/Assemblytics_within_alignment.py --delta $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz --min $MINIMUM_SIZE > $OUTPUT_PREFIX.variants_within_alignments.bed

        if [ -e $OUTPUT_PREFIX.variants_within_alignments.bed ];
        then
            echo "WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together." >> $LOG_FILE
            >&2 echo "4. Combine variants between and within alignments";
            HEADER="reference\tref_start\tref_stop\tID\tsize\tstrand\ttype\tref_gap_size\tquery_gap_size\tquery_coordinates\tmethod"
            cat <(echo -e $HEADER) $OUTPUT_PREFIX.variants_within_alignments.bed $OUTPUT_PREFIX.variants_between_alignments.bed > $OUTPUT_PREFIX.Assemblytics_structural_variants.bed

            if [ -e $OUTPUT_PREFIX.Assemblytics_structural_variants.bed ];
            then 
                echo "COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics." >> $LOG_FILE
                
                Rscript ./bin/Assemblytics_variant_charts.R $OUTPUT_PREFIX $MINIMUM_SIZE $MAXIMUM_SIZE

                ./bin/Assemblytics_index.py -coords $OUTPUT_PREFIX.coords.csv -out $OUTPUT_PREFIX
                
                # Rscript ./bin/Assemblytics_dotplot.R $OUTPUT_PREFIX

                cat $OUTPUT_PREFIX.coords.tab | awk '{print $7,$5}' OFS='\t' | sort | uniq | sort -k2,2nr >  $OUTPUT_PREFIX.coords.ref.genome
                cat $OUTPUT_PREFIX.coords.tab | awk '{print $8,$6}' OFS='\t' | sort | uniq | sort -k2,2nr >  $OUTPUT_PREFIX.coords.query.genome
                
                Rscript ./bin/Assemblytics_Nchart.R $OUTPUT_PREFIX
                
                ./bin/Assemblytics_summary.py -i $OUTPUT_PREFIX.Assemblytics_structural_variants.bed -min $MINIMUM_SIZE -max $MAXIMUM_SIZE > $OUTPUT_PREFIX.Assemblytics_structural_variants.summary


                if grep -q "Total" $OUTPUT_PREFIX.Assemblytics_structural_variants.summary; 
                then
                    echo "SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully" >> $LOG_FILE
                else
                    echo "SUMMARY,FAIL,Step 5: Assemblytics_summary.py failed" >> $LOG_FILE
                fi
            
            else
                echo "COMBINE,FAIL,Step 4: combining variants failed" >> $LOG_FILE
            fi
        else

            echo "WITHIN,FAIL,Step 3: Assemblytics_within_alignment.py failed: Possible problem before this step or with Python on server." >> $LOG_FILE
        fi
    else
        echo "BETWEEN,FAIL,Step 2: Assemblytics_between_alignments.pl failed: Possible problem with Perl or show-coords on server." >> $LOG_FILE
    fi
else
    echo "UNIQFILTER,FAIL,Step 1: Assemblytics_uniq_anchor.py failed: Possible problem with Python or Python packages on server." >> $LOG_FILE
fi