File: gc-parse.rb

package info (click to toggle)
genometools 1.6.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 50,576 kB
  • sloc: ansic: 271,876; ruby: 29,930; python: 5,106; sh: 3,083; makefile: 1,213; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (58 lines) | stat: -rwxr-xr-x 1,410 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env ruby

GC_code = Struct.new("GC_code",:name,:idnum,:aa,:start)

def gc_code_pretty(gc)
  l = ["  {\"#{gc.name}\"",
       "(unsigned int) #{gc.idnum}",
       "\"#{gc.aa}\"",
       "\"#{gc.start}\"}"]
  return l.join(",\n   ")
end

def transnum_idx_pretty(idx_map,idx)
  if idx_map.has_key?(idx)
    return "  #{idx_map[idx]}U"
  else
    return "  GT_UNDEFTRANSNUM"
  end
end

start_parse = false
current = nil
codelist = Array.new()
idx_map = Hash.new()
maxnum = nil
idx = 0
STDIN.each_line do |line|
  if start_parse
    if m = line.match(/name \"([^\"]*)\"/)
      if not m[1].match(/SGC[0-9]/)
        if not current.nil?
          codelist.push(current)
        end
        current = GC_code.new(m[1],nil,nil,nil)
      end
    elsif m = line.match(/id (\d+)/)
      current.idnum = m[1].to_i
      idx_map[current.idnum] = idx
      idx += 1
      maxnum = current.idnum
    elsif m = line.match(/sncbieaa \"([^\"]*)\"/)
      current.start = m[1]
    elsif m = line.match(/ncbieaa  \"([^\"]*)\"/)
      current.aa = m[1]
    end
  elsif line.match(/^Genetic-code-table/)
    start_parse = true
  end
end
codelist.push(current)

puts "static GtTranslationScheme schemetable[] = {"
puts codelist.map {|gc| gc_code_pretty(gc)}.join(",\n")
puts "};"

puts "\nstatic unsigned int transnum2index[] =\n{"
puts (0..maxnum).to_a.map {|idx| transnum_idx_pretty(idx_map,idx)}.join(",\n")
puts "};"