File: mk_ft_longest_common.rb

package info (click to toggle)
genometools 1.6.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 50,576 kB
  • sloc: ansic: 271,876; ruby: 29,930; python: 5,106; sh: 3,083; makefile: 1,213; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (258 lines) | stat: -rwxr-xr-x 7,593 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#!/usr/bin/env ruby

def gen_access_raw(mode,structvar,pre)
  if mode == "bytes"
    return "*#{pre}ptr"
  elsif mode == "twobit"
    return "gt_twobitencoding_char_at_pos(\n" +
           " " * 30 + "#{structvar}->twobitencoding,\n" +
           " " * 30 + "#{pre}ptr)"
  elsif mode == "encseq"
    return "gt_encseq_get_encoded_char(#{structvar}->encseq,\n" +
           " " * 20 + "#{pre}ptr,\n" +
           " " * 20 + "GT_READMODE_FORWARD)"
  else
    return "gt_sequenceobject_esr_get(#{structvar},#{pre}ptr)"
  end
end

def gen_compare(a_mode,b_mode,wildcard,complement)
  access_v = gen_access_raw(b_mode,"vseq","v")
  splitter = if access_v.length > 10 then ("\n" + " " * 12) else " " end
  cmp_expr = (if complement then ("GT_COMPLEMENTBASE(") else "" end) +
             "#{access_v}" +
             (if complement then ")" else "" end) + ")\n" +
             " " * 10 + "break"
  if wildcard
    return "const GtUchar cu = #{gen_access_raw(a_mode,"useq","u")};\n" +
           " " * 8 + "if (cu == GT_WILDCARD ||" + splitter + "cu !=" + splitter +
           cmp_expr
  else
    return "if (#{gen_access_raw(a_mode,"useq","u")} !=" + splitter +
           cmp_expr
  end
end

def gen_suffix(wildcard)
  if wildcard
    return "_wildcard"
  else
    return ""
  end
end

def gen_func_name(a_mode,b_mode,wildcard)
  return "ft_longest_common_#{a_mode}_#{b_mode}#{gen_suffix(wildcard)}"
end

def gen_ptr_assign(mode,pre,left2right)
  if mode == "bytes"
    if left2right
      return "#{pre}seq->bytesequenceptr + #{pre}seq->offset + #{pre}start; " +
             "#{pre}step = 1"
    else
      return "#{pre}seq->bytesequenceptr + #{pre}seq->offset - #{pre}start; " +
             "#{pre}step = -1"
    end
  else
    if left2right
      if mode != "encseq_reader"
        return "#{pre}seq->offset + #{pre}start; #{pre}step = 1"
      else
        return "#{pre}start"
      end
    else
      if mode != "encseq_reader"
        return "#{pre}seq->offset - #{pre}start; #{pre}step = -1"
      else
        return "#{pre}start"
      end
    end
  end
end

def gen_ptr_set(mode,pre)
  return ["\n    if (#{pre}seq->read_seq_left2right)",
          "{",
          "  #{pre}ptr = #{gen_ptr_assign(mode,pre,true)};",
          "} else",
          "{",
          "  #{pre}ptr = #{gen_ptr_assign(mode,pre,false)};",
          "}"].join("\n    ")
end

def gen_minsub(pre)
  other = if pre == "u" then "v" else "u" end
  return ["\n    GtUword minsubstringlength = #{other}start + #{pre}seq->substringlength - #{pre}start;",
          "if (#{other}seq->substringlength < minsubstringlength)",
          "{",
          "  minsubstringlength = #{other}seq->substringlength;",
          "}"].join("\n    ")
end

def gen_minsubstringlength_decl(a_mode,b_mode)
  minmatch_decl = ["\n    GtUword minsubstringlength = useq->substringlength - ustart;",
                     "if (vseq->substringlength < useq->insubstringlength)",
                     "{",
                     "  minsubstringlength = vseq->substringlength - vstart;",
                     "}"].join("\n    ") +
            gen_ptr_set(a_mode,"u") +
            gen_ptr_set(b_mode,"v")
  if a_mode == "encseq_reader"
    if b_mode == "encseq_reader"
      return "GtUword uptr = ustart, vptr = vstart;" +
             gen_minsub("v")
    elsif b_mode == "bytes"
      return "GtUword uptr = ustart; const GtUchar *vptr; int vstep;" +
             gen_minsub("v") +
             gen_ptr_set(b_mode,"v")
    else
      return "GtUword uptr = ustart, vptr; int vstep;" +
             gen_minsub("v") +
             gen_ptr_set(b_mode,"v")
    end
  elsif b_mode == "encseq_reader"
    if a_mode == "bytes"
      return "const GtUchar *uptr; int ustep; GtUword vptr = vstart;" +
             gen_minsub("u") +
             gen_ptr_set(a_mode,"u")
    else
       return "GtUword uptr; int ustep; GtUword vptr = vstart;" +
              gen_minsub("u") +
              gen_ptr_set(a_mode,"u")
    end
  else
    minmatch_decl = ["    GtUword minsubstringlength = useq->substringlength - ustart,",
                     "matchlength = 0;",
                     "if (vseq->substringlength - vstart < minsubstringlength)",
                     "{",
                     "  minsubstringlength = vseq->substringlength - vstart;",
                     "}"].join("\n    ") +
            gen_ptr_set(a_mode,"u") +
            gen_ptr_set(b_mode,"v")

    if a_mode == "bytes"
      if b_mode == "bytes"
        return "const GtUchar *uptr, *vptr; int ustep, vstep;\n" +
               minmatch_decl
      else
        return "const GtUchar *uptr; GtUword vptr; int ustep, vstep;\n" +
               minmatch_decl
      end
    else
      if b_mode == "bytes"
        return "GtUword uptr; const GtUchar *vptr; int ustep, vstep;\n" +
               minmatch_decl
      else
        return "GtUword uptr, vptr; int ustep, vstep;\n" +
               minmatch_decl
      end
    end
  end
end

def gen_ptr_incr(mode,pre)
  if mode == "encseq_reader"
    return "#{pre}ptr++"
  else
    return "#{pre}ptr += #{pre}step"
  end
end

def gen_matchlength_inc(a_mode,b_mode)
  if a_mode == "encseq_reader" or b_mode == "encseq_reader"
    return ""
  else
    return "matchlength++;"
  end
end

def gen_smaller(a_mode,b_mode)
  if a_mode == "encseq_reader"
    return "uptr"
  elsif b_mode == "encseq_reader"
    return "vptr"
  else
    return "matchlength"
  end
end

def gen_return_matchlength(a_mode,b_mode)
  if a_mode == "encseq_reader"
    return "uptr - ustart"
  elsif b_mode == "encseq_reader"
    return "vptr - vstart"
  else
    return "matchlength"
  end
end

def longestcommonfunc(a_mode,b_mode,wildcard)
 puts <<EOF
static GtUword #{gen_func_name(a_mode,b_mode,wildcard)}(
                                      GtFtSequenceObject *useq,
                                      GtUword ustart,
                                      GtFtSequenceObject *vseq,
                                      const GtUword vstart)
{
  if (ustart < useq->substringlength && vstart < vseq->substringlength)
  {
    #{gen_minsubstringlength_decl(a_mode,b_mode)}
    if (vseq->dir_is_complement)
    {
      do
      {
        #{gen_compare(a_mode,b_mode,wildcard,true)};
        #{gen_ptr_incr(a_mode,"u")};
        #{gen_ptr_incr(b_mode,"v")};#{gen_matchlength_inc(a_mode,b_mode)}
      } while (#{gen_smaller(a_mode,b_mode)} < minsubstringlength);
    } else
    {
      do
      {
        #{gen_compare(a_mode,b_mode,wildcard,false)};
        #{gen_ptr_incr(a_mode,"u")};
        #{gen_ptr_incr(b_mode,"v")};#{gen_matchlength_inc(a_mode,b_mode)}
      } while (#{gen_smaller(a_mode,b_mode)} < minsubstringlength);
    }
    return #{gen_return_matchlength(a_mode,b_mode)};
  }
  return 0;
}
EOF
end

first = true
func_list = Array.new()
modes = ["twobit","encseq_reader","encseq","bytes"]
[false,true].each do |wildcard|
  modes.each do |a_mode|
    modes.each do |b_mode|
     if first
       first = false
     else
       puts ""
     end
     longestcommonfunc(a_mode,b_mode,wildcard)
     func_list.push(gen_func_name(a_mode,b_mode,wildcard))
    end
  end
end

firstwildcard = nil
firstfunc = true
puts "\nGtLongestCommonFunc ft_longest_common_func_tab[] =\n{"
func_list.each_with_index do |func_name,idx|
  if firstwildcard.nil? and func_name.match(/_wildcard/)
    firstwildcard = idx
  end
  if firstfunc
    firstfunc = false
  else
    puts ","
  end
  print "  /* #{idx} */ #{func_name}"
end
puts "\n};"
puts "const int ft_longest_common_num_modes = #{modes.length};"
puts "const int ft_longest_common_func_first_wildcard = #{firstwildcard};"