File: xml.hpp

package info (click to toggle)
higan 098-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 11,904 kB
  • ctags: 13,286
  • sloc: cpp: 108,285; ansic: 778; makefile: 32; sh: 18
file content (217 lines) | stat: -rw-r--r-- 6,245 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#pragma once

//XML v1.0 subset parser
//revision 0.04

namespace nall { namespace XML {

//metadata:
//  0 = element
//  1 = attribute

struct ManagedNode;
using SharedNode = shared_pointer<ManagedNode>;

struct ManagedNode : Markup::ManagedNode {
protected:
  inline string escape() const {
    string result = _value;
    result.replace("&", "&amp;");
    result.replace("<", "&lt;");
    result.replace(">", "&gt;");
    if(_metadata == 1) {
      result.replace("\'", "&apos;");
      result.replace("\"", "&quot;");
    }
    return result;
  }

  inline bool isName(char c) const {
    if(c >= 'A' && c <= 'Z') return true;
    if(c >= 'a' && c <= 'z') return true;
    if(c >= '0' && c <= '9') return true;
    if(c == '.' || c == '_') return true;
    if(c == '?') return true;
    return false;
  }

  inline bool isWhitespace(char c) const {
    if(c ==  ' ' || c == '\t') return true;
    if(c == '\r' || c == '\n') return true;
    return false;
  }

  //copy part of string from source document into target string; decode markup while copying
  inline void copy(string& target, const char* source, uint length) {
    target.reserve(length + 1);

    #if defined(NALL_XML_LITERAL)
    memory::copy(target.pointer(), source, length);
    target[length] = 0;
    return;
    #endif

    char* output = target.get();
    while(length) {
      if(*source == '&') {
        if(!memory::compare(source, "&lt;",   4)) { *output++ = '<';  source += 4; length -= 4; continue; }
        if(!memory::compare(source, "&gt;",   4)) { *output++ = '>';  source += 4; length -= 4; continue; }
        if(!memory::compare(source, "&amp;",  5)) { *output++ = '&';  source += 5; length -= 5; continue; }
        if(!memory::compare(source, "&apos;", 6)) { *output++ = '\''; source += 6; length -= 6; continue; }
        if(!memory::compare(source, "&quot;", 6)) { *output++ = '\"'; source += 6; length -= 6; continue; }
      }

      if(_metadata == 0 && source[0] == '<' && source[1] == '!') {
        //comment
        if(!memory::compare(source, "<!--", 4)) {
          source += 4, length -= 4;
          while(memory::compare(source, "-->", 3)) source++, length--;
          source += 3, length -= 3;
          continue;
        }

        //CDATA
        if(!memory::compare(source, "<![CDATA[", 9)) {
          source += 9, length -= 9;
          while(memory::compare(source, "]]>", 3)) *output++ = *source++, length--;
          source += 3, length -= 3;
          continue;
        }
      }

      *output++ = *source++, length--;
    }
    *output = 0;
  }

  inline bool parseExpression(const char*& p) {
    if(*(p + 1) != '!') return false;

    //comment
    if(!memory::compare(p, "<!--", 4)) {
      while(*p && memory::compare(p, "-->", 3)) p++;
      if(!*p) throw "unclosed comment";
      p += 3;
      return true;
    }

    //CDATA
    if(!memory::compare(p, "<![CDATA[", 9)) {
      while(*p && memory::compare(p, "]]>", 3)) p++;
      if(!*p) throw "unclosed CDATA";
      p += 3;
      return true;
    }

    //DOCTYPE
    if(!memory::compare(p, "<!DOCTYPE", 9)) {
      uint counter = 0;
      do {
        char n = *p++;
        if(!n) throw "unclosed DOCTYPE";
        if(n == '<') counter++;
        if(n == '>') counter--;
      } while(counter);
      return true;
    }

    return false;
  }

  //returns true if tag closes itself (<tag/>); false if not (<tag>)
  inline bool parseHead(const char*& p) {
    //parse name
    const char* nameStart = ++p;  //skip '<'
    while(isName(*p)) p++;
    const char* nameEnd = p;
    copy(_name, nameStart, nameEnd - nameStart);
    if(_name.empty()) throw "missing element name";

    //parse attributes
    while(*p) {
      while(isWhitespace(*p)) p++;
      if(!*p) throw "unclosed attribute";
      if(*p == '?' || *p == '/' || *p == '>') break;

      //parse attribute name
      SharedNode attribute(new ManagedNode);
      attribute->_metadata = 1;

      const char* nameStart = p;
      while(isName(*p)) p++;
      const char* nameEnd = p;
      copy(attribute->_name, nameStart, nameEnd - nameStart);
      if(attribute->_name.empty()) throw "missing attribute name";

      //parse attribute data
      if(*p++ != '=') throw "missing attribute value";
      char terminal = *p++;
      if(terminal != '\'' && terminal != '\"') throw "attribute value not quoted";
      const char* dataStart = p;
      while(*p && *p != terminal) p++;
      if(!*p) throw "missing attribute data terminal";
      const char* dataEnd = p++;  //skip closing terminal

      copy(attribute->_value, dataStart, dataEnd - dataStart);
      _children.append(attribute);
    }

    //parse closure
    if(*p == '?' && *(p + 1) == '>') { p += 2; return true; }
    if(*p == '/' && *(p + 1) == '>') { p += 2; return true; }
    if(*p == '>') { p += 1; return false; }
    throw "invalid element tag";
  }

  //parse element and all of its child elements
  inline void parseElement(const char*& p) {
    SharedNode node(new ManagedNode);
    if(node->parseHead(p) == false) node->parse(p);
    _children.append(node);
  }

  //return true if </tag> matches this node's name
  inline bool parseClosureElement(const char*& p) {
    if(p[0] != '<' || p[1] != '/') return false;
    p += 2;
    const char* nameStart = p;
    while(*p && *p != '>') p++;
    if(*p != '>') throw "unclosed closure element";
    const char* nameEnd = p++;
    if(memory::compare(_name.data(), nameStart, nameEnd - nameStart)) throw "closure element name mismatch";
    return true;
  }

  //parse contents of an element
  inline void parse(const char*& p) {
    const char* dataStart = p;
    const char* dataEnd = p;

    while(*p) {
      while(*p && *p != '<') p++;
      if(!*p) break;
      dataEnd = p;
      if(parseClosureElement(p) == true) break;
      if(parseExpression(p) == true) continue;
      parseElement(p);
    }

    copy(_value, dataStart, dataEnd - dataStart);
  }

  friend auto unserialize(const string&) -> Markup::SharedNode;
};

inline auto unserialize(const string& markup) -> Markup::SharedNode {
  auto node = new ManagedNode;
  try {
    const char* p = markup;
    node->parse(p);
  } catch(const char* error) {
    delete node;
    node = nullptr;
  }
  return node;
}

}}