File: simpleindex.rb

package info (click to toggle)
xapian-bindings 1.4.29-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 21,436 kB
  • sloc: cpp: 379,853; python: 10,780; cs: 9,529; java: 6,949; sh: 4,629; perl: 4,435; makefile: 1,274; ruby: 1,028; php: 586; tcl: 246
file content (60 lines) | stat: -rwxr-xr-x 1,803 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env ruby
#
# Index each paragraph of a text file as a Xapian document.
#
# Originally by Paul Legato (plegato@nks.net), 4/22/06
# Based on Python's simplesearch.py
# Copyright (C) 2006 Networked Knowledge Systems, Inc.
# Copyright (C) 2007 Olly Betts
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
# USA

require 'xapian'

if ARGV.size != 1
  $stderr.puts "Usage: #{$0} PATH_TO_DATABASE"
  exit 99
end

# Open the database for update, creating a new database if necessary.
database = Xapian::WritableDatabase.new(ARGV[0], Xapian::DB_CREATE_OR_OPEN)

indexer = Xapian::TermGenerator.new()
stemmer = Xapian::Stem.new("english")
indexer.stemmer = stemmer

para = ''
while line = $stdin.gets()
  line.strip!()
  if line.empty?
    if not para.empty?
      # We've reached the end of a paragraph, so index it.
      doc = Xapian::Document.new()
      doc.data = para

      indexer.document = doc
      indexer.index_text(para)

      # Add the document to the database
      database.add_document(doc)
      para = ''
    end # if not para.empty?
  else # line not empty
    para += ' ' if para != ''
    para += line
  end # if line empty
end