File: expression.rb

package info (click to toggle)
ruby-bio 2.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,108 kB
  • sloc: ruby: 68,331; perl: 13; makefile: 11; sh: 1
file content (155 lines) | stat: -rw-r--r-- 3,051 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#
# = bio/db/kegg/expression.rb - KEGG EXPRESSION database class
#
# Copyright::	Copyright (C) 2001-2003, 2005
#		Shuichi Kawashima <shuichi@hgc.jp>,
#		Toshiaki Katayama <k@bioruby.org>
# License::	The Ruby License
#
# $Id: expression.rb,v 1.11 2007/04/05 23:35:41 trevor Exp $
#

require "bio/db"

module Bio
class KEGG

class EXPRESSION

  def initialize(entry)
    @orf2val   = Hash.new('')
    @orf2rgb   = Hash.new('')
    @orf2ratio = Hash.new('')
    @max_intensity = 10000
    entry.split("\n").each do |line|
      unless /^#/ =~ line
        ary = line.split("\t")
        orf = ary.shift
        val = ary[2, 4].collect {|x| x.to_f}
        @orf2val[orf] = val 
      end
    end
  end
  attr_reader :orf2val
  attr_reader :orf2rgb
  attr_reader :orf2ratio
  attr_reader :max_intensity

  def control_avg
    sum = 0.0
    @orf2val.values.each do |v|
      sum += v[0] - v[1]
    end
    sum/orf2val.size
  end

  def target_avg
    sum = 0.0
    @orf2val.values.each do |v|
      sum += v[2] - v[3]
    end
    sum/orf2val.size
  end

  def control_var
    sum = 0.0
    avg = self.control_avg
    @orf2val.values.each do |v|
      tmp = v[0] - v[1]
      sum += (tmp - avg)*(tmp - avg)
    end
    sum/orf2val.size
  end

  def target_var
    sum = 0.0
    avg = self.target_avg
    @orf2val.values.each do |v|
      tmp = v[2] - v[3]
      sum += (tmp - avg)*(tmp - avg)
    end
    sum/orf2val.size
  end

  def control_sd
    var = self.control_var
    Math.sqrt(var)
  end

  def target_sd
    var = self.target_var
    Math.sqrt(var)
  end

  def up_regulated(num=20, threshold=nil)
    logy_minus_logx
    ary = @orf2ratio.to_a.sort{|a, b| b[1] <=> a[1]}
    if threshold != nil
      i = 0
      while ary[i][1] > threshold
        i += 1
      end
      return ary[0..i]
    else
      return ary[0..num-1]
    end
  end

  def down_regulated(num=20, threshold=nil)
    logy_minus_logx
    ary = @orf2ratio.to_a.sort{|a, b| a[1] <=> b[1]}
    if threshold != nil
      i = 0
      while ary[i][1] < threshold
        i += 1
      end
      return ary[0..i]
    else
      return ary[0..num-1]
    end
  end

  def regulated(num=20, threshold=nil)
    logy_minus_logx
    ary = @orf2ratio.to_a.sort{|a, b| b[1].abs <=> a[1].abs}
    if threshold != nil
      i = 0
      while ary[i][1].abs > threshold
        i += 1
      end
      return ary[0..i]
    else
      return ary[0..num-1]
    end
  end

  def logy_minus_logx
    @orf2val.each do |k, v|
      @orf2ratio[k] = (1.0/Math.log10(2))*(Math.log10(v[2]-v[3]) - Math.log10(v[0]-v[1]))
    end
  end

  def val2rgb
    col_unit = @max_intensity/255
    @orf2val.each do |k, v|
      tmp_val = ((v[0] - v[1])/col_unit).to_i
      if tmp_val > 255
        g = "ff" 
      else
        g = format("%02x", tmp_val)
      end
      tmp_val = ((v[2] - v[3])/col_unit).to_i
      if tmp_val > 255
        r = "ff" 
      else
        r = format("%02x", tmp_val)
      end
      @orf2rgb[k] = r + g + "00"
    end
  
  end

end # class EXPRESSION

end # class KEGG
end # module Bio