1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
|
<?php
/* PHP5 script to index each paragraph of a text file as a Xapian document.
*
* Copyright (C) 2007,2009 Olly Betts
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
if (php_sapi_name() != "cli") {
print "This example script is written to run under the command line ('cli') version of\n";
print "the PHP interpreter, but you're using the '".php_sapi_name()."' version\n";
exit(1);
}
include "php5/xapian.php";
if ($argc != 2) {
print "Usage: {$argv[0]} PATH_TO_DATABASE\n";
exit(1);
}
try {
// Open the database for update, creating a new database if necessary.
$database = new XapianWritableDatabase($argv[1], Xapian::DB_CREATE_OR_OPEN);
$indexer = new XapianTermGenerator();
$stemmer = new XapianStem("english");
$indexer->set_stemmer($stemmer);
$para = '';
$lines = file("php://stdin");
foreach ($lines as $line) {
$line = rtrim($line);
if ($line == "" && $para != "") {
// We've reached the end of a paragraph, so index it.
$doc = new XapianDocument();
$doc->set_data($para);
$indexer->set_document($doc);
$indexer->index_text($para);
// Add the document to the database.
$database->add_document($doc);
$para = "";
} else {
if ($para != "") {
$para .= " ";
}
$para .= $line;
}
}
// Set the database handle to Null to ensure that it gets closed
// down cleanly or uncommitted changes may be lost.
$database = Null;
} catch (Exception $e) {
print $e->getMessage() . "\n";
exit(1);
}
?>
|