File: xlsxparse.cc

package info (click to toggle)
xapian-omega 1.2.12-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 2,968 kB
  • sloc: sh: 10,942; cpp: 10,142; makefile: 283; perl: 160
file content (86 lines) | stat: -rw-r--r-- 2,370 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/** @file xslxparse.cc
 * @brief Extract fields from XLSX sheet*.xml.
 */
/* Copyright (C) 2012 Olly Betts
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 */

#include <config.h>

#include "xlsxparse.h"

#include <cstdlib>

using namespace std;

bool
XlsxParser::opening_tag(const string &tag)
{
    if (tag == "c") {
	// We need to distinguish <v> tags which are inside <c t="s">, as these
	// are numeric references to shared strings.
	string type;
	if (get_parameter("t", type) && type == "s") {
	    mode = MODE_C_STRING;
	} else {
	    mode = MODE_C_LITERAL;
	}
    } else if (tag == "v") {
	if (mode == MODE_C_LITERAL) {
	    mode = MODE_V_LITERAL;
	} else if (mode == MODE_C_STRING) {
	    mode = MODE_V_STRING;
	}
    } else if (tag == "si") {
	mode = MODE_SI;
    } else if (tag == "sst") {
	string unique_count;
	if (get_parameter("uniquecount", unique_count)) {
	    unsigned long c = strtoul(unique_count.c_str(), NULL, 10);
	    // This reserving is just a performance tweak, so don't go reserving
	    // ludicrous amounts of space just because an XML attribute told us to.
	    sst.reserve(std::max(c, 1000000ul));
	}
    }
    return true;
}

void
XlsxParser::process_text(const string &text)
{
    switch (mode) {
	case MODE_V_STRING: {
	    // Shared string use.
	    unsigned long c = strtoul(text.c_str(), NULL, 10);
	    if (c < sst.size())
		append_field(sst[c]);
	    mode = MODE_NONE;
	    return;
	}
	case MODE_V_LITERAL:
	    // Literal (possibly calculated) field value.
	    append_field(text);
	    mode = MODE_NONE;
	    return;
	case MODE_SI:
	    // Shared string definition.
	    sst.push_back(text);
	    mode = MODE_NONE;
	    return;
	default:
	    return;
    }
}