File: seqsero_batch_pair-end

package info (click to toggle)
seqsero 1.0.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,232 kB
  • sloc: python: 2,447; perl: 82; sh: 55; makefile: 7
file content (47 lines) | stat: -rwxr-xr-x 1,370 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/sh

if [ $# != 1 ] ; then
    cat <<EOT
Usage: `basename $0` <dir_with_sequence_pairs>

This script seeks a directory for paired-end reads and processes
all pairs through seqsero using -m2 option.  If a sequence pair
was processed before it will be skipped.
EOT
    exit
fi

if [ ! -d $1 ] ; then
    echo "$1 needs to be a directory"
    exit
fi

numR1=`ls $1/*_R1_001.fastq.gz 2>/dev/null | wc -l`
if [ $numR1 -eq 0 ] ; then
    echo "No sequences found in directory $1"
    exit
fi

numR2=`ls $1/*_R2_001.fastq.gz | wc -l`
if [ $numR1 -ne $numR2 ] ; then
    cat <<EOT
Warning: suspicious number of sequences R1=$numR1 and R2=$numR2
Both values should be equal.  Please check that are data are valid.
Continuing processing anyway trying to find matching pairs.
EOT
fi

cd $1
for seq1 in `ls *_R1_001.fastq.gz` ; do
   seq2=`echo $seq1 | sed 's/_R1_001\.fastq\.gz$/_R2_001.fastq.gz/'`
   if [ ! -e "$seq2" ] ; then
      echo "No matching sequence found for $seq1.  File $seq2 does not exist."
   else
      check_old_results=`grep "^Input files:[[:space:]]\+\+$seq1 $seq2$" SeqSero_result*/Seqsero_result.txt`
      if [ "$check_old_results" != "" ] ; then
         echo "Calculation for $seq1 was done previously and can be found in `echo $check_old_results | sed 's/:Input files.*//'`"
      else
         seqsero -m 2 -i "$seq1" "$seq2"
      fi
   fi
done