File: br_bioflat.rb

package info (click to toggle)
bioruby 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 6,328 kB
  • ctags: 7,787
  • sloc: ruby: 61,539; xml: 3,383; makefile: 58; sh: 4
file content (293 lines) | stat: -rwxr-xr-x 7,344 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
#!/usr/bin/env ruby
# 
# = bioflat - OBDA flat file indexer (executable)
# 
# Copyright::   Copyright (C) 2002
#               Naohisa Goto <ng@bioruby.org>
# License::     The Ruby License
#
# $Id: br_bioflat.rb,v 1.17 2007/04/05 23:35:39 trevor Exp $ 
# 

require 'bio'

def usage
  print <<EOM
Search:
  #{$0} [--search] [options...] [DIR/]DBNAME KEYWORDS
or
  #{$0} [--search] --location DIR --dbname DBNAME [options...] KEYWORDS

Search options:
  --namespace NAME       set serch namespace to NAME
  (or --name NAME)         You can set this option many times to specify
                           more than one namespace.

Create index:
  #{$0} --create --location DIR --dbname DBNAME [--format <genbank|embl|fasta>] [options...] [--files] FILES
Update index:
  #{$0} --update --location DIR --dbname DBNAME [options...] [--files] FILES

Create index options:
  --primary=UNIQUE       set primary namespece to UNIQUE
                           Default primary/secondary namespaces depend on
                           each format of flatfiles.
  --secondary=KEY        set secondary namespaces.
                           You may use this option many times to specify
                           more than one namespace.
  --add-secondary=KEY    add secondary namespaces to default specification.
                           You can use this option many times.

Options only valid for --create (or --update) --type flat:
  --sort=/path/to/sort   use external sort program (e.g. /usr/bin/sort)
  --sort=BUILTIN         use builtin sort routine
                         (default: /usr/bin/sort or BUILTIN)
  --env=/path/to/env     use env program to run sort (default: /usr/bin/env)
  --env-arg=XXXXXX       argument given to the env program (default: LC_ALL=C)
                         (multiple --env-arg=XXXXXX can be specified)

Options only valid for --update:
  --renew                re-read all flatfiles and update whole index

Backward compatibility:
  --makeindex DIR/DBNAME
      same as --create --type flat --location DIR --dbname DBNAME
  --makeindexBDB DIR/DBNAME
      same as --create --type bdb  --location DIR --dbname DBNAME
  --format=CLASS
      instead of genbank|embl|fasta, specifing a class name is allowed

Show namespaces:
  #{$0} --show-namespaces [--location DIR --dbname DBNAME] [DIR/DBNAME]
or
  #{$0} --show-namespaces [--format=CLASS]
or
  #{$0} --show-namespaces --files file

EOM

end


def do_index(mode = :create)
  case ARGV[0]
  when /^\-\-?make/
    dbpath = ARGV[1]
    args = ARGV[2..-1]
    is_bdb = nil
  when /^\-\-?make.*bdb/i
    dbname = ARGV[1]
    args = ARGV[2..-1]
    is_bdb = Bio::FlatFileIndex::MAGIC_BDB
  when /^\-\-create/, /^\-\-update/
    args = ARGV[1..-1]
  else
    usage
  end

  options = {}

  while args.first =~ /^\-/
    case x = args.shift

    # OBDA stuff

    when /^\-\-?format$/
      args.shift
      format = nil		# throw this f*ckin' mess for auto detect :)
    when /^\-\-?location/
      location = args.shift.chomp('/')
    when /^\-\-?dbname/
      dbname = args.shift
    when /^\-\-?(index)?type/
      indextype = args.shift
      case indextype
      when /bdb/
	is_bdb = Bio::FlatFileIndex::MAGIC_BDB
      when /flat/
	is_bdb = nil
      else
	usage
      end

    # BioRuby extension

    when /^\-\-?files/i
      break

    when /^\-\-?format\=(.*)/i
      format = $1

    when /^\-\-?sort\=(.*)/i
      options['sort_program'] = $1
      options['onmemory'] = nil
    when /^\-\-?no\-?te?mp/i
      options['onmemory'] = true

    when /^\-\-?env\=(.*)/i
      options['env_program'] = $1

    when /^\-\-?env-arg(?:ument)?\=(.*)/i
      options['env_program_arguments'] ||= []
      options['env_program_arguments'].push $1

    when /^\-\-?primary.*\=(.*)/i
      options['primary_namespace'] = $1

    when /^\-\-?add-secondary.*\=(.*)/i
      unless options['additional_secondary_namespaces'] then
	options['additional_secondary_namespaces'] = []
      end
      options['additional_secondary_namespaces'] << $1 if $1.length > 0

    when /^\-\-?secondary.*\=(.*)/i
      unless options['secondary_namespaces'] then
	options['secondary_namespaces'] = []
      end
      options['secondary_namespaces'] << $1 if $1.length > 0

    when /^\-\-?renew/
      options['renew'] = true

    else
      $stderr.print "Warning: ignoring invalid option #{x.inspect}\n"
    end
  end

  dbpath = File.join(location, dbname) unless dbpath
  if mode == :update then
    Bio::FlatFileIndex::update_index(dbpath, format, options, *args)
  else
    Bio::FlatFileIndex::makeindex(is_bdb, dbpath, format, options, *args)
  end
end


def do_search
  dbname = nil
  location = nil
  names = []
  while x = ARGV.shift
    case x
    when /\A\-\-?search/i
      #do nothing
    when /\A\-\-?location/i
      location = ARGV.shift.to_s.chomp('/')
    when /\A\-\-?dbname/i
      dbname = ARGV.shift
    when /\A\-\-?name(?:space)?(?:\=(.+))?/i
      if $1 then
	names << $1
      elsif x = ARGV.shift
	names << x
      end
    else
      ARGV.unshift x
      break
    end
  end
  dbname = ARGV.shift unless dbname
  dbname = File.join(location, dbname) unless location.to_s.empty?
  db = Bio::FlatFileIndex.open(dbname)
  ARGV.each do |key|
    $stderr.print "Searching for \'#{key}\'...\n"
    #r = db.search(key)
    #$stderr.print "OK, #{r.size} entry found\n"
    #if r.size > 0 then
    #  print r
    #end
    begin
      if names.empty? then
	r = db.include?(key)
      else
	r = db.include_in_namespaces?(key, *names)
      end
    rescue RuntimeError
      $stderr.print "ERROR: #{$!}\n"
      next
    end
    r = [] unless r
    $stderr.print "OK, #{r.size} entry found\n"
    r.each do |i|
      print db.search_primary(i)
    end
  end
  db.close
end


def do_show_namespaces
  dbname = nil
  location = nil
  files = nil
  format = nil
  names = []
  while x = ARGV.shift
    case x
    when /\A\-\-?(show\-)?name(space)?s/i
      #do nothing
    when /\A\-\-?location/i
      location = ARGV.shift.to_s.chomp('/')
    when /\A\-\-?dbname/i
      dbname = ARGV.shift
    when /\A\-\-?format(?:\=(.+))?/i
      if $1 then
	format = $1
      elsif x = ARGV.shift
	format = x
      end
    when /\A\-\-?files/i
      files = ARGV
      break
    else
      ARGV.unshift x
      break
    end
  end
  if files then
    k = nil
    files.each do |x|
      k = Bio::FlatFile.autodetect_file(x)
      break if k
    end
    if k then
      $stderr.print "Format: #{k.to_s}\n"
      format = k
    else
      $stderr.print "ERROR: couldn't determine file format\n"
      return
    end
  end
  $stderr.print "Namespaces: (first line: primary namespace)\n"
  if format then
    parser = Bio::FlatFileIndex::Indexer::Parser.new(format)
    print parser.primary.name, "\n"
    puts parser.secondary.keys
  else
    dbname = ARGV.shift unless dbname
    dbname = File.join(location, dbname) unless location.to_s.empty?
    db = Bio::FlatFileIndex.open(dbname)
    puts db.namespaces
    db.close
  end
end

if ARGV.size > 1
  case ARGV[0]
  when /--make/, /--create/
    Bio::FlatFileIndex::DEBUG.out = true
    do_index
  when /--update/
    Bio::FlatFileIndex::DEBUG.out = true
    do_index(:update)
  when /\A\-\-?(show\-)?name(space)?s/i
    do_show_namespaces
  when /--search/
    do_search
  else #default is search
    do_search
  end
else
  usage
end