File: cmp-seex-bench.rb

package info (click to toggle)
genometools 1.6.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 50,576 kB
  • sloc: ansic: 271,876; ruby: 29,930; python: 5,106; sh: 3,083; makefile: 1,213; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (55 lines) | stat: -rwxr-xr-x 1,361 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env ruby

if ARGV.length != 3
  STDERR.puts "Usage: #{$0} <startseedlength> <endseedlength> <multiple fasta file>"
  exit 1
end

startseedlength = ARGV[0].to_i
endseedlength = ARGV[1].to_i
inputfile = ARGV[2]

def run_evaluations(startseedlength,endseedlength,inputfile)
  startseedlength.upto(endseedlength) do |seedlength|
    cmd = "cmp-seex.rb --silent --inputfile #{inputfile} " +
          "--seedlength #{seedlength} --minid 90 --maxalilendiff 30"
    IO.popen(cmd.split(/\s/)).each_line do |line|
      if not line.match(/^#/)
        yield line
      end
    end
    if "#{$?}" != "" and not "#{$?}".match(/exit 0$/)
      STDERR.puts "FAILURE: #{cmd}: \"#{$?}\""
      exit 1
    end
  end
end

def add_value(dist,value)
  if dist.has_key?(value)
    dist[value] += 1
  else
    dist[value] = 1
  end
end

def accumulate(filename)
  dist_both = Hash.new()
  dist_greedy_only = Hash.new()
  dist_xdrop_only = Hash.new()
  FILE.open(filename).each_line do |line|
    values = line.split(/\t/)
    add_value(dist_both,values[3].to_i)
    add_value(dist_greedy_only,values[4].to_i)
    add_value(dist_xdrop_only,values[5].to_i)
  end
  [dist_both,dist_greedy_only,dist_xdrop_only].each do |h|
    h.sort.each do |k,v|
      puts "#{k}\t#{v}"
    end
  end
end

run_evaluations(startseedlength,endseedlength,inputfile) do |line|
  puts line
end