1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
#!/usr/bin/ruby
require 'optparse'
require 'ostruct'
def listdirectory(directory)
# prepare regexp for entries to ignore
# saves time for repeated regexp use, since it stays the same
ignore_dirs = Regexp.compile(/^\.\.?$/)
stack = Array.new
stack.push(directory)
while not stack.empty?
d = stack.pop
Dir.foreach(d) do |entry|
if not ignore_dirs.match(entry)
if File.stat("#{d}/#{entry}").file?
yield "#{d}/#{entry}"
else
stack.push("#{d}/#{entry}")
end
end
end
end
end
def listselected(dirname,excludelist,includefastq,includegzip)
suffixes = ["fasta","fna","fa","fsa","FASTA"]
if includefastq
suffixes.push("fastq")
end
if includegzip
suffixes.push("fsa.gz")
suffixes.push("FASTA.gz")
end
listdirectory(dirname) do |filename|
suffixes.each do |suffix|
if filename.match(/\.#{suffix}$/) and
not excludelist.member?(File.basename(filename))
yield filename
end
end
end
end
def parseargs(argv)
options = OpenStruct.new
options.withgttestdata = true
options.excludelist = Array.new()
options.includefastq = true
options.includegzip = true
opts = OptionParser.new()
opts.on("-n","--no-gttestdata","exclude gttestdata") do |x|
options.withgttestdata = false
end
opts.on("-e","--excludelist STRING",
"list of files (basenames) to exclude") do |x|
x.split(/,/).each do |ef|
options.excludelist.push(ef)
end
end
opts.on("-q","--no-fastq",
"exclude files ending with .fastq") do
options.includefastq = false
end
opts.on("-g","--no-gzip",
"exclude files ending with .gz") do
options.includegzip = false
end
opts.on( '-h', '--help', 'Display this screen' ) do
puts "Usage: #{$0} [options]"
puts opts
exit 0
end
rest = opts.parse(argv)
if rest.length != 0
STDERR.puts options.banner
exit 1
end
return options
end
options = parseargs(ARGV)
testdata_exclude = ["solid_color_reads.fastq",
"test2_wrong_begin.fastq",
"test9_uneven_length.fastq",
"test7_empty_seq.fastq",
"test6_premature_end.fastq",
"test4_different_seqlengths.fastq",
"test3_different_seqnames.fastq",
"corruptpatternfile.fna",
"TTT-small-wrongchar.fna",
"sw100K1.fsa",
"sw100K2.fsa"] + options.excludelist
if ENV.has_key?("GTDIR")
testdata_dir = "#{ENV["GTDIR"]}/testdata"
listselected(testdata_dir,testdata_exclude,options.includefastq,
options.includegzip) do |filename|
puts filename
end
end
if options.withgttestdata
if ENV.has_key?("GTTESTDATA")
gttestdata_exclude = ["trembl-section.fsa.gz"]
listselected(ENV["GTTESTDATA"],gttestdata_exclude,options.includefastq,
options.includegzip) do |filename|
puts filename
end
end
end
|