1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
#!/usr/bin/perl
#
# A sample search program which demonstrates many of Xapian's commonly used
# features.
#
# Copyright (C) 2009 Olly Betts
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
use 5.006;
use strict;
use warnings;
use Search::Xapian (':all');
# Constants denoting what we're using the number value slots for.
my $SLOT_DATE = 0;
my $SLOT_DOCNUM = 1;
my $SLOT_TYPE = 2;
my $SLOT_AUTHOR = 3;
my $SLOT_TITLE = 4;
# We need at least three command line arguments.
if (scalar @ARGV < 3) {
print STDERR "Usage: $0 PATH_TO_DATABASE[...] -- [-sSORTBY] QUERY[...]\n";
print STDERR "where SORTBY can be date, id, type, author, or title.\n";
print STDERR "\n";
print STDERR "Search syntax supported:\n";
print STDERR " Boolean filters: type:book id:tt0076759\n";
print STDERR " Date ranges: 25/12/1970..31/12/1979\n";
print STDERR " Boolean operators: AND OR NOT\n";
print STDERR " Free text fields: author:John title:\"star wars\"\n";
print STDERR " Phrases: \"accidental death\"\n";
exit 1;
}
eval {
# Open the database(s) for searching.
my $database = Search::Xapian::Database->new(shift @ARGV);
while (scalar @ARGV && $ARGV[0] ne '--') {
# Xapian can transparently search several databases together.
my $extra_db = Search::Xapian::Database->new(shift @ARGV);
$database->add_database($extra_db);
}
shift @ARGV;
# Default is sort by relevance.
my $sort_by;
if (scalar @ARGV && $ARGV[0] =~ /^-s(\w+)$/) {
if ($1 eq 'date') {
$sort_by = 0;
} elsif ($1 eq 'id') {
$sort_by = 1;
} elsif ($1 eq 'type') {
$sort_by = 2;
} elsif ($1 eq 'author') {
$sort_by = 3;
} elsif ($1 eq 'title') {
$sort_by = 4;
} else {
print STDERR "Bad option '-s$1'.\n";
exit 1;
}
shift @ARGV;
}
# Start an enquire session.
my $enquire = Search::Xapian::Enquire->new($database);
# Combine the remaining command line arguments with a space between each.
# This means that simple queries without shell metacharacters in don't
# have to be quoted just to appear as a single argument to the shell.
my $query_string = join ' ', @ARGV;
# Set up the QueryParser how we want.
my $qp = Search::Xapian::QueryParser->new();
$qp->set_database($database);
$qp->set_stemmer(Search::Xapian::Stem->new("english"));
$qp->set_stemming_strategy(STEM_SOME);
# Prefixes for free-text fields.
$qp->add_prefix('title', 'S');
$qp->add_prefix('author', 'A');
# Prefixes for boolean filters.
$qp->add_boolean_prefix('type', 'XTYPE');
$qp->add_boolean_prefix('id', 'Q');
# Second argument of 1 means "prefer mm/dd/yyyy".
# Third argument means that two digit years < 20 are 20xx; >= 29 are 19xx.
my $vrpdate = new Search::Xapian::DateValueRangeProcessor($SLOT_DATE, 1,
1920);
$qp->add_valuerangeprocessor($vrpdate);
# Parse the query string and return a Xapian::Query object.
my $query = $qp->parse_query(
$query_string,
FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD
);
print "Internal view of parsed query is:\n$query\n\n";
$enquire->set_query($query);
if (defined $sort_by) {
$enquire->set_sort_by_value($sort_by, 0);
}
# Return the top 10 results for the query.
my $mset = $enquire->get_mset(0, 10);
my $msize = $mset->size();
if ($msize == 0) {
print "No matching documents found.\n";
exit 0;
}
# Display the results.
if ($mset->get_matches_lower_bound() != $mset->get_matches_upper_bound()) {
print "About ";
}
printf "%u matching documents were found.\n",
$mset->get_matches_estimated();
print "Results 1-$msize:\n";
foreach my $m ($mset->items()) {
printf "#%u: Score %u%%: %s\n",
$m->get_rank() + 1,
$m->get_percent(),
$m->get_document()->get_data();
}
};
if ($@) {
# Report the exception which we've caught.
print STDERR "Exception: $@\n";
exit 1;
}
|