File: simpleindex.pl

package info (click to toggle)
libsearch-xapian-perl 1.2.25.5-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 848 kB
  • sloc: perl: 2,332; makefile: 6
file content (70 lines) | stat: -rwxr-xr-x 1,907 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/perl
#
# Index each paragraph of a text file as a Xapian document.
#
# Copyright (C) 2003 James Aylett
# Copyright (C) 2004,2007,2009 Olly Betts
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
# USA

use 5.006;
use strict;
use warnings;

use Search::Xapian (':all');

if (scalar @ARGV != 1) {
    print STDERR "Usage: $0 PATH_TO_DATABASE\n";
    exit(1);
}

eval {
    # Open the database for update, creating a new database if necessary.
    my $database = Search::Xapian::WritableDatabase->new($ARGV[0], DB_CREATE_OR_OPEN);

    my $indexer = Search::Xapian::TermGenerator->new();
    my $stemmer = Search::Xapian::Stem->new("english");
    $indexer->set_stemmer($stemmer);

    my $para = '';
    while (my $line = <STDIN>) {
	$line =~ s/\s+$//;
	$line =~ s/^\s+//;
	if ($line eq '') {
	    if ($para ne '') {
		# We've reached the end of a paragraph, so index it.
		my $doc = Search::Xapian::Document->new();
		$doc->set_data($para);

		$indexer->set_document($doc);
		$indexer->index_text($para);

		# Add the document to the database.
		$database->add_document($doc);
		$para = '';
	    }
	} else {
	    if ($para ne '') {
		$para .= ' ';
	    }
	    $para .= $line;
	}
    }
};
if ($@) {
    print STDERR "Exception: $@\n";
    exit(1);
}