File: demo-worm-pacbio.sh

package info (click to toggle)
miniasm 0.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 632 kB
  • sloc: ansic: 2,528; perl: 120; sh: 66; makefile: 29
file content (56 lines) | stat: -rwxr-xr-x 6,613 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
prefix=ce-40X

# list of read files
cat > $prefix.files <<EOF
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0001/Analysis_Results/m140928_184123_42139_c100719602550000001823155305141590_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0001/Analysis_Results/m140928_184123_42139_c100719602550000001823155305141590_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0001/Analysis_Results/m140928_184123_42139_c100719602550000001823155305141590_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0002/Analysis_Results/m140928_230547_42139_c100719602550000001823155305141591_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0002/Analysis_Results/m140928_230547_42139_c100719602550000001823155305141591_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0002/Analysis_Results/m140928_230547_42139_c100719602550000001823155305141591_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0003/Analysis_Results/m140929_033247_42139_c100719602550000001823155305141592_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0003/Analysis_Results/m140929_033247_42139_c100719602550000001823155305141592_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0003/Analysis_Results/m140929_033247_42139_c100719602550000001823155305141592_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0004/Analysis_Results/m140929_075857_42139_c100719602550000001823155305141593_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0004/Analysis_Results/m140929_075857_42139_c100719602550000001823155305141593_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0004/Analysis_Results/m140929_075857_42139_c100719602550000001823155305141593_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0005/Analysis_Results/m140929_122654_42139_c100719602550000001823155305141594_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0005/Analysis_Results/m140929_122654_42139_c100719602550000001823155305141594_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0005/Analysis_Results/m140929_122654_42139_c100719602550000001823155305141594_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0001/Analysis_Results/m140928_191128_sidney_c100699772550000001823139903261590_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0001/Analysis_Results/m140928_191128_sidney_c100699772550000001823139903261590_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0001/Analysis_Results/m140928_191128_sidney_c100699772550000001823139903261590_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0002/Analysis_Results/m140928_233028_sidney_c100699772550000001823139903261591_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0002/Analysis_Results/m140928_233028_sidney_c100699772550000001823139903261591_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0002/Analysis_Results/m140928_233028_sidney_c100699772550000001823139903261591_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0003/Analysis_Results/m140929_034941_sidney_c100699772550000001823139903261592_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0003/Analysis_Results/m140929_034941_sidney_c100699772550000001823139903261592_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0003/Analysis_Results/m140929_034941_sidney_c100699772550000001823139903261592_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0004/Analysis_Results/m140929_080908_sidney_c100699772550000001823139903261593_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0004/Analysis_Results/m140929_080908_sidney_c100699772550000001823139903261593_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0004/Analysis_Results/m140929_080908_sidney_c100699772550000001823139903261593_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0005/Analysis_Results/m140929_122826_sidney_c100699772550000001823139903261594_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0005/Analysis_Results/m140929_122826_sidney_c100699772550000001823139903261594_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0005/Analysis_Results/m140929_122826_sidney_c100699772550000001823139903261594_s1_p0.3.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0006/Analysis_Results/m140929_164720_sidney_c100699772550000001823139903261595_s1_p0.1.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0006/Analysis_Results/m140929_164720_sidney_c100699772550000001823139903261595_s1_p0.2.subreads.fasta
http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0006/Analysis_Results/m140929_164720_sidney_c100699772550000001823139903261595_s1_p0.3.subreads.fasta
EOF

# download read file
if [ ! -f $prefix.fa.gz ]; then
	wget -O- -qi $prefix.files | gzip -1 > $prefix.fa.gz
fi

# Install minimap and miniasm (requiring gcc and zlib)
git clone https://github.com/lh3/minimap && (cd minimap && make)
git clone https://github.com/lh3/miniasm && (cd miniasm && make)

# Overlap (shorter N50 without -I6G)
minimap/minimap -Sw5 -L100 -m0 -t8 -I6G $prefix.fa.gz $prefix.fa.gz 2> $prefix.paf.gz.log | gzip -1 > $prefix.paf.gz

# Layout
miniasm/miniasm -f $prefix.fa.gz $prefix.paf.gz > $prefix.gfa 2> $prefix.gfa.log

# Convert to FASTA
awk '/^S/{print ">"$2"\n"$3}' $prefix.gfa > $prefix.utg.fa