File: copydatabase.cc

package info (click to toggle)
xapian-core 1.4.3-2%2Bdeb9u3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 21,412 kB
  • sloc: cpp: 113,868; ansic: 8,723; sh: 4,433; perl: 836; makefile: 566; tcl: 317; python: 40
file content (174 lines) | stat: -rw-r--r-- 5,563 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/** @file copydatabase.cc
 * @brief Perform a document-by-document copy of one or more Xapian databases.
 */
/* Copyright (C) 2006,2007,2008,2009,2010,2011 Olly Betts
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 */

#include <config.h>

#include <xapian.h>

#include <iomanip>
#include <iostream>

#include <cmath> // For log10().
#include <cstdlib> // For exit().
#include <cstring> // For strcmp() and strrchr().

using namespace std;

#define PROG_NAME "copydatabase"
#define PROG_DESC "Perform a document-by-document copy of one or more Xapian databases"

static void
show_usage(int rc)
{
    cout << "Usage: " PROG_NAME " SOURCE_DATABASE... DESTINATION_DATABASE\n\n"
"Options:\n"
"  --no-renumber    Preserve the numbering of document ids (useful if you have\n"
"                   external references to them, or have set them to match\n"
"                   unique ids from an external source).  If multiple source\n"
"                   databases are specified and the same docid occurs in more\n"
"                   one, the last occurrence will be the one which ends up in\n"
"                   the destination database.\n"
"  --help           display this help and exit\n"
"  --version        output version information and exit" << endl;
    exit(rc);
}

int
main(int argc, char **argv)
try {
    bool renumber = true;
    if (argc > 1 && argv[1][0] == '-') {
	if (strcmp(argv[1], "--help") == 0) {
	    cout << PROG_NAME " - " PROG_DESC "\n\n";
	    show_usage(0);
	}
	if (strcmp(argv[1], "--version") == 0) {
	    cout << PROG_NAME " - " PACKAGE_STRING << endl;
	    exit(0);
	}
	if (strcmp(argv[1], "--no-renumber") == 0) {
	    renumber = false;
	    argv[1] = argv[0];
	    ++argv;
	    --argc;
	}
    }

    // We expect two or more arguments: at least one source database path
    // followed by the destination database path.
    if (argc < 3) show_usage(1);

    // Create the destination database, using DB_CREATE so that we don't
    // try to overwrite or update an existing database in case the user
    // got the command line argument order wrong.
    const char *dest = argv[argc - 1];
    Xapian::WritableDatabase db_out(dest, Xapian::DB_CREATE);

    for (int i = 1; i < argc - 1; ++i) {
	char * src = argv[i];
	if (*src) {
	    // Remove any trailing directory separator.
	    char & ch = src[strlen(src) - 1];
	    if (ch == '/' || ch == '\\') ch = '\0';
	}

	// Open the source database.
	Xapian::Database db_in(src);

	// Find the leaf-name of the database path for reporting progress.
	const char * leaf = strrchr(src, '/');
#if defined __WIN32__ || defined __OS2__
	if (!leaf) leaf = strrchr(src, '\\');
#endif
	if (leaf) ++leaf; else leaf = src;

	// Iterate over all the documents in db_in, copying each to db_out.
	Xapian::doccount dbsize = db_in.get_doccount();
	if (dbsize == 0) {
	    cout << leaf << ": empty!" << endl;
	} else {
	    // Calculate how many decimal digits there are in dbsize.
	    int width = static_cast<int>(log10(double(dbsize))) + 1;

	    Xapian::doccount c = 0;
	    Xapian::PostingIterator it = db_in.postlist_begin(string());
	    while (it != db_in.postlist_end(string())) {
		Xapian::docid did = *it;
		if (renumber) {
		    db_out.add_document(db_in.get_document(did));
		} else {
		    db_out.replace_document(did, db_in.get_document(did));
		}

		// Update for the first 10, and then every 13th document
		// counting back from the end (this means that all the
		// digits "rotate" and the counter ends up on the exact
		// total.
		++c;
		if (c <= 10 || (dbsize - c) % 13 == 0) {
		    cout << '\r' << leaf << ": ";
		    cout << setw(width) << c << '/' << dbsize << flush;
		}

		++it;
	    }

	    cout << endl;
	}

	cout << "Copying spelling data..." << flush;
	Xapian::TermIterator spellword = db_in.spellings_begin();
	while (spellword != db_in.spellings_end()) {
	    db_out.add_spelling(*spellword, spellword.get_termfreq());
	    ++spellword;
	}
	cout << " done." << endl;

	cout << "Copying synonym data..." << flush;
	Xapian::TermIterator synkey = db_in.synonym_keys_begin();
	while (synkey != db_in.synonym_keys_end()) {
	    string key = *synkey;
	    Xapian::TermIterator syn = db_in.synonyms_begin(key);
	    while (syn != db_in.synonyms_end(key)) {
		db_out.add_synonym(key, *syn);
		++syn;
	    }
	    ++synkey;
	}
	cout << " done." << endl;

	cout << "Copying user metadata..." << flush;
	Xapian::TermIterator metakey = db_in.metadata_keys_begin();
	while (metakey != db_in.metadata_keys_end()) {
	    string key = *metakey;
	    db_out.set_metadata(key, db_in.get_metadata(key));
	    ++metakey;
	}
	cout << " done." << endl;
    }

    cout << "Committing..." << flush;
    // Commit explicitly so that any error is reported.
    db_out.commit();
    cout << " done." << endl;
} catch (const Xapian::Error & e) {
    cerr << '\n' << argv[0] << ": " << e.get_description() << endl;
    exit(1);
}