File: cmp_db_query_exch.rb

package info (click to toggle)
genometools 1.6.1%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 50,412 kB
  • sloc: ansic: 271,241; ruby: 30,339; python: 4,880; sh: 3,193; makefile: 1,194; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (72 lines) | stat: -rwxr-xr-x 1,592 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env ruby

def openfile(filename)
begin
  fp = File.new(filename,"r")
rescue => err
  STDERR.puts "#{$0}: cannot open #{filename}: #{err}"
end
  return fp
end

def conv_to_i(v)
  if v.match(/^[0-9]+$/)
    return v.to_i
  elsif v.match(/^[0-9\.]+$/)
    return v.to_f
  else
    return v
  end
end

def convertfileinput(lines)
  return lines.select{|line| not line.match(/^#/)}.
               map! {|l| l.split(/\s/).map {|v| conv_to_i(v)}}
end

def extract(val,idxlist)
  a = Array.new()
  idxlist.each do |idx|
    a.push(val[idx])
  end
  return a
end

if ARGV.length < 2
  STDERR.puts "Usage: #{$0} <matchfile1> <matchfile1> [exceptionlist]"
  exit 1
end

filename0 = ARGV[0]
filename1 = ARGV[1]

exceptionlist = Array.new()
if ARGV.length > 2
  2.upto(ARGV.length - 1).each do |idx|
    exceptionlist.push(ARGV[idx].to_i)
  end
end

lines0 = convertfileinput(openfile(filename0).readlines.uniq)
lines1 = convertfileinput(openfile(filename1).readlines.uniq)

lines0.sort! {|a,b| extract(a,[1,2,5,6,9]) <=> extract(b,[1,2,5,6,9])}
lines1.sort! {|a,b| extract(a,[5,6,1,2,9]) <=> extract(b,[5,6,1,2,9])}
idxmap = [4,5,6,3,0,1,2,7,8,9]

minlen = [lines0.length,lines1.length].min
0.upto(minlen-1).each do |linenum|
  a = lines0[linenum]
  b = lines1[linenum]
  idxmap.each_with_index do |idx,val|
    if a[idx] != b[val] and not exceptionlist.member?(linenum)
      STDERR.puts "#{a}\n#{b}\nin line #{linenum} do not match"
      exit 1
    end
  end
end

if lines0.length != lines1.length
  STDERR.puts "#{$0}: files have length #{lines0.length} and #{line1.length}"
  exit 1
end