1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
|
#pragma once
//XML v1.0 subset parser
//revision 0.04
namespace nall { namespace XML {
//metadata:
// 0 = element
// 1 = attribute
struct ManagedNode;
using SharedNode = shared_pointer<ManagedNode>;
struct ManagedNode : Markup::ManagedNode {
protected:
inline string escape() const {
string result = _value;
result.replace("&", "&");
result.replace("<", "<");
result.replace(">", ">");
if(_metadata == 1) {
result.replace("\'", "'");
result.replace("\"", """);
}
return result;
}
inline bool isName(char c) const {
if(c >= 'A' && c <= 'Z') return true;
if(c >= 'a' && c <= 'z') return true;
if(c >= '0' && c <= '9') return true;
if(c == '.' || c == '_') return true;
if(c == '?') return true;
return false;
}
inline bool isWhitespace(char c) const {
if(c == ' ' || c == '\t') return true;
if(c == '\r' || c == '\n') return true;
return false;
}
//copy part of string from source document into target string; decode markup while copying
inline void copy(string& target, const char* source, uint length) {
target.reserve(length + 1);
#if defined(NALL_XML_LITERAL)
memory::copy(target.pointer(), source, length);
target[length] = 0;
return;
#endif
char* output = target.get();
while(length) {
if(*source == '&') {
if(!memory::compare(source, "<", 4)) { *output++ = '<'; source += 4; length -= 4; continue; }
if(!memory::compare(source, ">", 4)) { *output++ = '>'; source += 4; length -= 4; continue; }
if(!memory::compare(source, "&", 5)) { *output++ = '&'; source += 5; length -= 5; continue; }
if(!memory::compare(source, "'", 6)) { *output++ = '\''; source += 6; length -= 6; continue; }
if(!memory::compare(source, """, 6)) { *output++ = '\"'; source += 6; length -= 6; continue; }
}
if(_metadata == 0 && source[0] == '<' && source[1] == '!') {
//comment
if(!memory::compare(source, "<!--", 4)) {
source += 4, length -= 4;
while(memory::compare(source, "-->", 3)) source++, length--;
source += 3, length -= 3;
continue;
}
//CDATA
if(!memory::compare(source, "<![CDATA[", 9)) {
source += 9, length -= 9;
while(memory::compare(source, "]]>", 3)) *output++ = *source++, length--;
source += 3, length -= 3;
continue;
}
}
*output++ = *source++, length--;
}
*output = 0;
}
inline bool parseExpression(const char*& p) {
if(*(p + 1) != '!') return false;
//comment
if(!memory::compare(p, "<!--", 4)) {
while(*p && memory::compare(p, "-->", 3)) p++;
if(!*p) throw "unclosed comment";
p += 3;
return true;
}
//CDATA
if(!memory::compare(p, "<![CDATA[", 9)) {
while(*p && memory::compare(p, "]]>", 3)) p++;
if(!*p) throw "unclosed CDATA";
p += 3;
return true;
}
//DOCTYPE
if(!memory::compare(p, "<!DOCTYPE", 9)) {
uint counter = 0;
do {
char n = *p++;
if(!n) throw "unclosed DOCTYPE";
if(n == '<') counter++;
if(n == '>') counter--;
} while(counter);
return true;
}
return false;
}
//returns true if tag closes itself (<tag/>); false if not (<tag>)
inline bool parseHead(const char*& p) {
//parse name
const char* nameStart = ++p; //skip '<'
while(isName(*p)) p++;
const char* nameEnd = p;
copy(_name, nameStart, nameEnd - nameStart);
if(_name.empty()) throw "missing element name";
//parse attributes
while(*p) {
while(isWhitespace(*p)) p++;
if(!*p) throw "unclosed attribute";
if(*p == '?' || *p == '/' || *p == '>') break;
//parse attribute name
SharedNode attribute(new ManagedNode);
attribute->_metadata = 1;
const char* nameStart = p;
while(isName(*p)) p++;
const char* nameEnd = p;
copy(attribute->_name, nameStart, nameEnd - nameStart);
if(attribute->_name.empty()) throw "missing attribute name";
//parse attribute data
if(*p++ != '=') throw "missing attribute value";
char terminal = *p++;
if(terminal != '\'' && terminal != '\"') throw "attribute value not quoted";
const char* dataStart = p;
while(*p && *p != terminal) p++;
if(!*p) throw "missing attribute data terminal";
const char* dataEnd = p++; //skip closing terminal
copy(attribute->_value, dataStart, dataEnd - dataStart);
_children.append(attribute);
}
//parse closure
if(*p == '?' && *(p + 1) == '>') { p += 2; return true; }
if(*p == '/' && *(p + 1) == '>') { p += 2; return true; }
if(*p == '>') { p += 1; return false; }
throw "invalid element tag";
}
//parse element and all of its child elements
inline void parseElement(const char*& p) {
SharedNode node(new ManagedNode);
if(node->parseHead(p) == false) node->parse(p);
_children.append(node);
}
//return true if </tag> matches this node's name
inline bool parseClosureElement(const char*& p) {
if(p[0] != '<' || p[1] != '/') return false;
p += 2;
const char* nameStart = p;
while(*p && *p != '>') p++;
if(*p != '>') throw "unclosed closure element";
const char* nameEnd = p++;
if(memory::compare(_name.data(), nameStart, nameEnd - nameStart)) throw "closure element name mismatch";
return true;
}
//parse contents of an element
inline void parse(const char*& p) {
const char* dataStart = p;
const char* dataEnd = p;
while(*p) {
while(*p && *p != '<') p++;
if(!*p) break;
dataEnd = p;
if(parseClosureElement(p) == true) break;
if(parseExpression(p) == true) continue;
parseElement(p);
}
copy(_value, dataStart, dataEnd - dataStart);
}
friend auto unserialize(const string&) -> Markup::SharedNode;
};
inline auto unserialize(const string& markup) -> Markup::SharedNode {
auto node = new ManagedNode;
try {
const char* p = markup;
node->parse(p);
} catch(const char* error) {
delete node;
node = nullptr;
}
return node;
}
}}
|