1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
|
'''
filters a given GIIRA GTF file for all genes with sufficient support
Copyright (c) 2013,
Franziska Zickmann,
ZickmannF@rki.de, Robert Koch-Institute, Berlin, Germany
Distributed under the GNU Lesser General Public License, version 3.0
'''
import sys
import numpy
if len(sys.argv) <= 4:
print "Script to filter a given GIIRA GTF file for all genes with sufficient support."
print "Usage: python filterGenes.py [Path_TO_GTF] [PATH_OUTFILE] [WANT_AMBI_FILTER: (y/n)] [WANT_COVERAGE_FILTER: (y/n)] [WANT_UNIQUE_COVERAGE_FILTER: (y/n)]"
print "AMBI-FILTER = tag \"alsoUniqueSupport\" in GIIRA output"
print "COVERAGE-FILTER = tag \"coverageSupport\" in GIIRA output"
print "UNIQUE_COVERAGE-FILTER = tag \"hasEnoughUniques\" in GIIRA output"
print "Example: python filterGenes.py myGenes.gtf myGenes_filtered.gtf y y n"
print "This applies a filtering for genes only supported by ambiguous reads or lacking sufficient overall coverage."
sys.exit(1)
if "-h" in sys.argv[1]:
print "Script to filter a given GIIRA GTF file for all genes with sufficient support."
print "Usage: python filterGenes.py [Path_TO_GTF] [PATH_OUTFILE] [WANT_AMBI_FILTER: (y/n)] [WANT_COVERAGE_FILTER: (y/n)] [WANT_UNIQUE_COVERAGE_FILTER: (y/n)]"
print "AMBI-FILTER = tag \"alsoUniqueSupport\" in GIIRA output"
print "COVERAGE-FILTER = tag \"coverageSupport\" in GIIRA output"
print "UNIQUE_COVERAGE-FILTER = tag \"hasEnoughUniques\" in GIIRA output"
print "Example: python filterGenes.py myGenes.gtf myGenes_filtered.gtf y y y"
print "This applies a filtering for genes only supported by ambiguous reads or lacking sufficient overall coverage."
sys.exit(1)
pathToGTF = sys.argv[1]
pathOutFile = sys.argv[2]
wantMultiFilter = sys.argv[3]
wantCovFilter = sys.argv[4]
wantUniqueCovfilter = sys.argv[5]
infile = open(pathToGTF, 'r')
outfile = open(pathOutFile, 'w')
for line in infile:
arr = line.rstrip().split("\t")
arrTag = arr[8].split(";")
wantLine = True
if "y" in wantMultiFilter:
if ": n" in arrTag[len(arrTag)-4]:
wantLine = False
if "y" in wantCovFilter:
if ": n" in arrTag[len(arrTag)-3]:
wantLine = False
if "y" in wantUniqueCovfilter:
if ": n" in arrTag[len(arrTag)-2]:
wantLine = False
if wantLine:
outfile.write(line);
|