1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
#ifndef READXL_CELLTYPE_
#define READXL_CELLTYPE_
#include <Rcpp.h>
#include <libxls/xls.h>
enum CellType {
CELL_BLANK,
CELL_DATE,
CELL_NUMERIC,
CELL_TEXT
};
bool inline isDateTime(int id, const std::set<int> custom);
inline std::vector<CellType> cellTypes(Rcpp::CharacterVector x) {
std::vector<CellType> types;
types.reserve(x.size());
for (int i = 0; i < x.size(); ++i) {
std::string type(x[i]);
if (type == "blank") {
types.push_back(CELL_BLANK);
} else if (type == "date") {
types.push_back(CELL_DATE);
} else if (type == "numeric") {
types.push_back(CELL_NUMERIC);
} else if (type == "text") {
types.push_back(CELL_TEXT);
} else {
Rcpp::warning("Unknown type '%s' at position %i. Using text instead.",
type, i + 1);
}
}
return types;
}
inline std::string cellTypeDesc(CellType type) {
switch(type) {
case CELL_BLANK: return "blank";
case CELL_DATE: return "date";
case CELL_NUMERIC: return "numeric";
case CELL_TEXT: return "text";
}
return "???";
}
inline CellType cellType(xls::st_cell::st_cell_data cell, xls::st_xf* styles,
const std::set<int>& customDateFormats,
std::string na = "") {
// Find codes in [MS-XLS] S2.3.2 (p175).
// See xls_addCell for those used for cells
switch(cell.id) {
case 253: // LabelSst
case 516: // Label
return (na.compare((char*) cell.str) == 0) ? CELL_BLANK : CELL_TEXT;
break;
case 6: // formula
case 1030: // formula (Apple Numbers Bug)
if (cell.l == 0) {
return CELL_NUMERIC;
} else {
if (na.compare((char*) cell.str) == 0) {
return CELL_BLANK;
} else {
return CELL_TEXT;
}
}
break;
case 189: // MulRk
case 515: // Number
case 638: // Rk
{
if (styles == NULL)
return CELL_NUMERIC;
int format = styles->xf[cell.xf].format;
return isDateTime(format, customDateFormats) ? CELL_DATE : CELL_NUMERIC;
}
break;
case 190: // MulBlank
case 513: // Blank
return CELL_BLANK;
break;
default:
Rcpp::Rcout << "Unknown type: " << cell.id << "\n";
return CELL_NUMERIC;
}
}
bool inline isDateTime(int id, const std::set<int> custom) {
// Date formats:
// ECMA-376 (http://www.ecma-international.org/publications/standards/Ecma-376.htm)
// 18.8.30 numFmt (Number Format) (p1777)
// Date times: 14-22, 27-36, 45-47, 50-58, 71-81 (inclusive)
if ((id >= 14 && id <= 22) ||
(id >= 27 && id <= 36) ||
(id >= 45 && id <= 47) ||
(id >= 50 && id <= 58) ||
(id >= 71 && id <= 81))
return true;
// Built-in format that's not a date
if (id < 164)
return false;
return custom.count(id) > 0;
}
inline bool isDateFormat(std::string x) {
for (size_t i = 0; i < x.size(); ++i) {
switch (x[i]) {
case 'd':
case 'm': // 'mm' for minutes
case 'y':
case 'h': // 'hh'
case 's': // 'ss'
return true;
default:
break;
}
}
return false;
}
inline Rcpp::RObject makeCol(CellType type, int n) {
switch(type) {
case CELL_BLANK:
return R_NilValue;
case CELL_DATE: {
Rcpp::RObject col = Rcpp::NumericVector(n, NA_REAL);
col.attr("class") = Rcpp::CharacterVector::create("POSIXct", "POSIXt");
col.attr("tzone") = "UTC";
return col;
}
break;
case CELL_NUMERIC:
return Rcpp::NumericVector(n, NA_REAL);
break;
case CELL_TEXT:
return Rcpp::CharacterVector(n, NA_STRING);
break;
}
return R_NilValue;
}
// Make data frame from list of columns, dropping blanks
inline Rcpp::List colDataframe(Rcpp::List cols, Rcpp::CharacterVector names,
std::vector<CellType> types) {
int p = cols.size();
int p_out = 0;
for (int j = 0; j < p; ++j) {
if (types[j] != CELL_BLANK)
p_out++;
}
Rcpp::List out(p_out);
Rcpp::CharacterVector names_out(p_out);
int j_out = 0;
for (int j = 0; j < p; ++j) {
if (types[j] == CELL_BLANK)
continue;
out[j_out] = cols[j];
names_out[j_out] = names[j];
j_out++;
}
// Turn list into a data frame
int n = (p_out == 0) ? 0 : Rf_length(out[0]);
out.attr("class") = Rcpp::CharacterVector::create("tbl_df", "tbl", "data.frame");
out.attr("row.names") = Rcpp::IntegerVector::create(NA_INTEGER, -n);
out.attr("names") = names_out;
return out;
}
#endif
|