1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
|
#!/bin/bash
set -e
#Written by Brian Bushnell
#Last updated August 7, 2019
#Fetches the latest taxonomy dump from ncbi, and formats it for use with BBTools
#For other BBTools programs that use taxonomy, you can set "taxpath=X" where X is the location of the files generated by this script.
#Setting "taxpath=X tree=auto" will cause the program to look for the tax tree at location "X/tree.taxtree.gz".
wget -q -O - ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/dead_nucl.accession2taxid.gz | shrinkaccession.sh in=stdin.txt.gz out=shrunk.dead_nucl.accession2taxid.gz zl=9 t=4 &
wget -q -O - ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/dead_prot.accession2taxid.gz | shrinkaccession.sh in=stdin.txt.gz out=shrunk.dead_prot.accession2taxid.gz zl=9 t=6 &
wget -q -O - ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/dead_wgs.accession2taxid.gz | shrinkaccession.sh in=stdin.txt.gz out=shrunk.dead_wgs.accession2taxid.gz zl=9 t=6 &
wget -q -O - ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/nucl_gb.accession2taxid.gz | shrinkaccession.sh in=stdin.txt.gz out=shrunk.nucl_gb.accession2taxid.gz zl=9 t=8 &
wget -q -O - ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/nucl_wgs.accession2taxid.gz | shrinkaccession.sh in=stdin.txt.gz out=shrunk.nucl_wgs.accession2taxid.gz zl=9 t=10 &
wget -q -O - ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/pdb.accession2taxid.gz | shrinkaccession.sh in=stdin.txt.gz out=shrunk.pdb.accession2taxid.gz zl=9 t=4 &
wget -q -O - ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz | shrinkaccession.sh in=stdin.txt.gz out=shrunk.prot.accession2taxid.gz zl=9 t=10
wget -nv ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip
unzip -o taxdmp.zip
time taxtree.sh names.dmp nodes.dmp merged.dmp tree.taxtree.gz -Xmx16g
time gitable.sh shrunk.dead_nucl.accession2taxid.gz,shrunk.dead_prot.accession2taxid.gz,shrunk.dead_wgs.accession2taxid.gz,shrunk.nucl_gb.accession2taxid.gz,shrunk.nucl_wgs.accession2taxid.gz,shrunk.pdb.accession2taxid.gz,shrunk.prot.accession2taxid.gz gitable.int1d.gz -Xmx24g
time analyzeaccession.sh shrunk.*.accession2taxid.gz out=patterns.txt
rm -f gi_*.dmp.gz
rm -f *.dmp
rm -f gc.prt
rm -f readme.txt
|