File: biofetch.rb

package info (click to toggle)
ruby-bio 2.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,108 kB
  • sloc: ruby: 68,331; perl: 13; makefile: 11; sh: 1
file content (572 lines) | stat: -rwxr-xr-x 14,137 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
#!/usr/bin/env ruby
# coding: utf-8
#
# biofetch.rb : BioFetch server (interface to TogoWS)
#
#   Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
#                 2013      GOTO Naohisa <ng@bioruby.org>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#

require 'cgi'
require 'erb'
require 'open-uri'
require 'fileutils'
require 'tempfile'

MAX_ID_NUM = 50

# script name
SCRIPT_NAME = File.basename(__FILE__)

# full URL for this CGI
BASE_URL = "http://bioruby.org/cgi-bin/#{SCRIPT_NAME}"

# cache directory for metadata
# Note: The cache is only for metadata (database list and format list).
# Data entries are NOT cached.
CACHE_DIR = '/tmp/biofetch_rb.cache'

# cache lifetime
CACHE_LIFETIME = 60 * 60 # 1 hour

module TogoWS
  TOGOWS_URL = 'http://togows.dbcls.jp/'

  def togows_database_complete_list
    result = togows_get_cached('/entry/')
    result.to_s.split(/\n/).collect {|x| x.split(/\t/) }
  end

  def togows_database_formats(db)
    db = CGI.escape(db)
    result = togows_get_cached("/entry/#{db}/?formats")
  end

  def togows_get(path)
    uristr = TOGOWS_URL + path
    begin
      result = OpenURI.open_uri(uristr).read
    rescue OpenURI::HTTPError
      result = nil
    end
    result
  end

  private

  def togows_get_cached(path)
    filepath = path.sub(/\A\//, '').sub(/\/\z/, '')
    filepath = filepath.gsub(/\//, " ")
    filepath = filepath.sub(/\?/, '_')
    filepath = File.join(CACHE_DIR, filepath)
    result = nil
    begin
      if Time.now - File.mtime(filepath) > CACHE_LIFETIME
        # delete expired cache file
        File.delete(filepath)
      end
      result = File.read(filepath)
    rescue IOError, SystemCallError
      result = nil
    end
    unless result then
      # valid cache is not found
      result = togows_get(path)
      if result then
        # create cache directory if not found
        FileUtils.mkdir_p(CACHE_DIR, :mode => 0700)
        # simple security check for the cache dir
        if File.stat(CACHE_DIR).mode & 0022 != 0 then
          raise SecurityError, "CACHE_DIR #{CACHE_DIR} is writeable by others"
        end

        # write to temporary file
        tmp = Tempfile.open('temp', CACHE_DIR)
        tmp.print result
        tmp.close
        # create a hard link from the temporary to the cache file
        begin
          File.link(tmp.path, filepath)
        rescue IOError, SystemCallError
        end
        # the temporay file will be automatically removed at exit
      end
    end
    result
  end

end #module TogoWS

module BioFetchError

  def print_text_page(str)
    print "Content-type: text/plain; charset=UTF-8\n\n"
    puts str
    exit
  end

  def print_html_page(str)
    print "Content-type: text/html; charset=UTF-8\n\n"
    print "<pre>", CGI.escapeHTML(str), "</pre>\n"
    exit
  end

  def error1(db)
    db = CGI.escapeHTML(db.to_s) # to avoid potential XSS with old IE
    str = "ERROR 1 Unknown database [#{db}]."
    print_text_page(str)
  end

  def error2(style)
    style = CGI.escapeHTML(style.to_s) # to avoid potential XSS with old IE
    str = "ERROR 2 Unknown style [#{style}]."
    print_text_page(str)
  end

  def error3(format, db)
    # to avoid potential XSS with old IE which ignores Content-Type
    db = CGI.escapeHTML(db.to_s)
    format = CGI.escapeHTML(format.to_s)
    str = "ERROR 3 Format [#{format}] not known for database [#{db}]."
    print_text_page(str)
  end

  def error4(entry_id, db)
    # to avoid potential XSS with old IE which ignores Content-Type
    entry_id = CGI.escapeHTML(entry_id.to_s)
    db = CGI.escapeHTML(db.to_s)
    str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]."
    print_text_page(str)
  end

  def error5(count)
    # to avoid potential XSS with old IE which ignores Content-Type
    count = CGI.escapeHTML(count.to_s)
    str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed."
    print_text_page(str)
  end

  def error6(info)
    # to avoid potential XSS with old IE which ignores Content-Type
    count = CGI.escapeHTML(info.to_s)
    str = "ERROR 6 Illegal information request [#{info}]."
    print_text_page(str)
  end

end



module ApiBridge

  include BioFetchError
  include TogoWS

  def list_databases_with_synonyms
    togows_database_complete_list
  end

  def list_databases
    list_databases_with_synonyms.flatten
  end

  def bget(db, id_list, format)
    case format
    when 'fasta'
      format = '.fasta'
    else
      format = ''
    end
    db = CGI.escape(db)
    
    results = ''
    id_list.each do |query_id|
      query_id = CGI.escape(query_id)
      path = "/entry/#{db}/#{query_id}#{format}"
      result = togows_get(path)

      if !result or result.empty? or /\AError\: / =~ result then
        error4(query_id, db)
      else
        results << result
      end
    end
    return results
  end

  def check_fasta_ok?(db)
    result = togows_database_formats(db)
    /^fasta$/ =~ result.to_s
  end

end #module ApiBridge

module BioFetchCheck

  include ApiBridge

  private

  def check_style(style)
    style = style.to_s.downcase
    error2(style) unless /\A(html|raw)\z/.match(style)
    style
  end

  def check_format(format, db)
    fmt = format ? format.to_s.downcase : nil
    case fmt
    when 'fasta'
      db = check_dbname(db)
      fmt = nil unless check_fasta_ok?(db)
    when 'default'
      # do nothing
    when nil
      fmt = 'default'
    else
      fmt = nil
    end

    error3(format, db) unless fmt
    fmt
  end

  def check_number_of_id(num)
    error5(num) if num > MAX_ID_NUM
  end

  def check_dbname(db)
    db = db.to_s.downcase
    error1(db) unless list_databases.include?(db)
    db
  end

end #module BioFetchCheck

class BioFetch

  include BioFetchCheck
  include BioFetchError
  include ApiBridge

  def initialize(db, id_list, style, format)
    style = check_style(style)
    format = check_format(format, db)
    check_number_of_id(id_list.length)
    db = check_dbname(db)

    entries = bget(db, id_list, format)

    if style == 'html' then
      print_html_page(entries)
    else
      print_text_page(entries)
    end

  end

end #class BioFetch



class BioFetchInfo

  include BioFetchCheck
  include BioFetchError
  include ApiBridge

  def initialize(info, db)
    @db = db

    begin
      check_info(info) ? __send__(info) : raise
    rescue
      error6(info)
    end
  end

  private

  def check_info(meth_name)
    /\A(dbs|formats|maxids)\z/ =~ meth_name
  end

  def dbs
    str = list_databases.sort.join(' ')
    print_text_page(str)
  end

  def formats
    db = check_dbname(@db)
    fasta = " fasta" if check_fasta_ok?(db)
    str = "default#{fasta}"
    print_text_page(str)
  end

  def maxids
    str = MAX_ID_NUM.to_s
    print_text_page(str)
  end

end #class BioFetchInfo



class BioFetchCGI

  include ApiBridge

  def initialize(cgi)
    @cgi = cgi
    show_page
  end

  private

  def show_page
    if info.empty?
      if id_list.empty?
        show_query_page
      else
        show_result_page(db, id_list, style, format)
      end
    else
      show_info_page(info, db)
    end
  end

  def show_query_page
    html = ERB.new(DATA.read)
    max_id_num = MAX_ID_NUM
    databases_with_synonyms = list_databases_with_synonyms
    databases = list_databases
    script_name = SCRIPT_NAME
    base_url = BASE_URL
    @cgi.out({ "type" => "text/html", "charset" => "utf-8" }) do
      html.result(binding)
    end
  end

  def show_result_page(db, id_list, style, format)
    BioFetch.new(db, id_list, style, format)
  end

  def show_info_page(info, db)
    BioFetchInfo.new(info, db)
  end

  def info
    @cgi['info'].downcase
  end

  def db
    @cgi['db'].downcase
  end

  def id_list
    @cgi['id'].strip.split(/[\,\s]+/)
  end

  def style
    s = @cgi['style'].downcase
    return s.empty? ? "html" : s
  end

  def format
    f = @cgi['format'].downcase
    return f.empty? ? "default" : f
  end

end



BioFetchCGI.new(CGI.new)



=begin

This program was created during BioHackathon 2002, Tucson and updated
in Cape Town :)

Rewrited in 2013 to use TogoWS API as the bioruby.org server left from The
University of Tokyo and the old SOAP-based KEGG API is discontinued.

=end


__END__

<HTML>
<HEAD>
  <LINK href="http://bioruby.org/img/favicon.png" rel="icon" type="image/png">
  <LINK href="http://bioruby.org/css/bioruby.css" rel="stylesheet" type="text/css">
  <TITLE>BioFetch interface to TogoWS</TITLE>
</HEAD>

<BODY bgcolor="#ffffff">

<H1>
<IMG src="http://bioruby.org/img/ruby.png" align="middle">
BioFetch interface to
<A href="http://togows.dbcls.jp/">TogoWS</A>
</H1>

<P>This page allows you to retrieve up to <%= max_id_num %> entries at a time from various up-to-date biological databases.</P>

<HR>

<FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="<%= script_name %>">

<SELECT name="db">
<% databases_with_synonyms.each do |dbs|
     a = dbs[1..-1]
     synonyms = unless a.empty? then
                  " (abbr: " + a.join(", ") + ")"
                else
                  ""
                end
%>
<OPTION value="<%= dbs[0] %>"><%= dbs[0] %><%= synonyms %></OPTION>
<% end %>
</SELECT>

<INPUT name="id" size="40" type="text" maxlength="1000">

<SELECT name="format">
<OPTION value="default">Default</OPTION>
<OPTION value="fasta">Fasta</OPTION>
</SELECT>

<SELECT name="style">
<OPTION value="raw">Raw</OPTION>
<OPTION value="html">HTML</OPTION>
</SELECT>

<INPUT type="submit">

</FORM>

<HR>

<H2>Direct access</H2>

<P><%= base_url %>?format=(default|fasta|...);style=(html|raw);db=(nuccore|embl|...);id=ID[,ID,ID,...]</P>
<P>(NOTE: the option separator ';' can be '&')</P>

<DL>
  <DT> <U>format</U> (optional)
  <DD> default|fasta|...

  <DT> <U>style</U> (required)
  <DD> html|raw

  <DT> <U>db</U> (required)
  <DD> <%= databases.join('|') %>

  <DT> <U>id</U> (required)
  <DD> comma separated list of IDs
</DL>

<P>See the <A href="http://obda.open-bio.org/">BioFetch specification</A> for more details.</P>

<H2>Server informations</H2>

<DL>
  <DT> <A href="?info=dbs">What databases are available?</A>
  <DD> <%= base_url %>?info=dbs

  <DT> <A href="?info=formats;db=embl">What formats does the database X have?</A>
  <DD> <%= base_url %>?info=formats;db=embl

  <DT> <A href="?info=maxids">How many entries can be retrieved simultaneously?</A>
  <DD> <%= base_url %>?info=maxids
</DL>

<H2>Examples</H2>

<DL>
  <DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/raw)
  <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376

  <DT> <A href="?format=fasta;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (fasta/raw)
  <DD> <%= base_url %>?format=fasta;style=raw;db=nuccore;id=AJ617376

  <DT> <A href="?format=default;style=html;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/html)
  <DD> <%= base_url %>?format=default;style=html;db=nuccore;id=AJ617376

  <DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377">nuccore/AJ617376,AJ617377</A> (default/raw, multiple)
  <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377

  <DT> <A href="?format=default;style=raw;db=embl;id=J00231">embl/J00231</A> (default/raw)
  <DD> <%= base_url %>?format=default;style=raw;db=embl;id=J00231

  <DT> <A href="?format=default;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (default/raw)
  <DD> <%= base_url %>?format=default;style=raw;db=uniprot;id=CYC_BOVIN

  <DT> <A href="?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (fasta/raw)
  <DD> <%= base_url %>?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN

  <DT> <A href="?format=default;style=raw;db=genes;id=eco%3Ab0015">genes/eco:b0015</A> (default/raw)
  <DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco%3Ab0015
  <DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco:b0015

</DL>

<H2>Errors</H2>

<DL>
  <DT> <A href="?format=default;style=raw;db=nonexistent;id=AJ617376">Error1</A> sample : DB not found
  <DD> <%= base_url %>?format=default;style=raw;db=nonexistent;id=AJ617376

  <DT> <A href="?format=default;style=nonexistent;db=nuccore;id=AJ617376">Error2</A> sample : unknown style
  <DD> <%= base_url %>?format=default;style=nonexistent;db=nuccore;id=AJ617376

  <DT> <A href="?format=nonexistent;style=raw;db=nuccore;id=AJ617376">Error3</A> sample : unknown format
  <DD> <%= base_url %>?format=nonexistent;style=raw;db=nuccore;id=AJ617376

  <DT> <A href="?format=default;style=raw;db=nuccore;id=nonexistent">Error4</A> sample : ID not found
  <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=nonexistent

  <DT> <A href="?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51">Error5</A> sample : too many IDs
  <DD> <%= base_url %>?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51

  <DT> <A href="?info=nonexistent">Error6</A> sample : unknown info
  <DD> <%= base_url %>?info=nonexistent"
</DL>

<H2>Other BioFetch implementations</H2>

<UL>
  <LI> <A href="http://www.ebi.ac.uk/cgi-bin/dbfetch">dbfetch at EBI</A>
</UL>

<HR>

<DIV align=right>
<I>
staff@Bio<span class="ruby">Ruby</span>.org
</I>
<BR>
<BR>
<A href="http://bioruby.org/"><IMG border=0 src="/img/banner.gif"></A>
</DIV>

</BODY>
</HTML>