File: xmldocument.h

package info (click to toggle)
signalbackup-tools 20250313.1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 3,752 kB
  • sloc: cpp: 47,042; sh: 477; ansic: 399; ruby: 19; makefile: 3
file content (211 lines) | stat: -rw-r--r-- 5,798 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/*
  Copyright (C) 2024-2025  Selwin van Dijk

  This file is part of signalbackup-tools.

  signalbackup-tools is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  signalbackup-tools is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with signalbackup-tools.  If not, see <https://www.gnu.org/licenses/>.
*/

#ifndef XMLDOCUMENT_H_
#define XMLDOCUMENT_H_

#include <vector>
#include <map>
#include <string>

#include "../logger/logger.h"
#include "../common_be.h"

class XmlDocument
{
 public:
  class Node
  {
   public:
    struct StringOrRef
    {
      std::string value; // for now
      std::string file;
      long long int pos = -1;
      long long int size = -1;
    };

   private:
    static int constexpr s_maxsize = 1024;

    Node *d_parent;
    std::vector<Node> d_children;
    std::map<std::string, StringOrRef> d_attributes;
    std::string d_name;
    std::string d_value; // make this a separate thing, so it can refer to file and position/size if size is too big
    //bool d_value_contains_ampersand; // just a helper if we have read the value during parsing anyway...
    bool is_closed;
    bool is_text_node;
   public:
    inline explicit Node(Node *parent = nullptr);
    void print(int indent = 0) const;

    inline bool isTextNode() const;
    inline std::string const &name() const;
    inline bool hasAttribute(std::string const &name) const;
    inline std::string getAttribute(std::string const &name) const;
    inline StringOrRef getAttributeStringOrRef(std::string const &name) const;

    inline auto begin() const;
    inline auto end() const;

    friend class XmlDocument;
  };

 private:
  enum State
  {
    INITIAL,
    FINISHED,
    PROLOG_ELEMENT_DTD_COMMENT,
    DTD_COMMENT,
    COMMENT,
    COMMENT_FIRST_OPEN_HYPHEN,
    COMMENT_FIRST_CLOSE_HYPHEN,
    COMMENT_SECOND_CLOSE_HYPHEN,
    //ATTRIBUTE_ELEMENT_END,
    ELEMENT_SELFCLOSING_END,
    ELEMENT_AFTER_TAGNAME,
    ATTRIBUTE_NAME,  // [a-zA-Z_]{1}[^ <>&]* (inside ELEMENT_TAG)
    ATTRIBUTE_WAIT_QUOTE,
    ATTRIBUTE_VALUE_SINGLE, // ['] (ends with ['], whichever opened it), after ATTRIBUTE_NAME and '='
    ATTRIBUTE_VALUE_DOUBLE, // ["] (ends with ["], whichever opened it), after ATTRIBUTE_NAME and '='
    ELEMENT_TAG,     // [a-zA-Z_]{1}[a-zA-Z_-.]*  , also not starting with (icase)"xml"
    ELEMENT_CLOSING_TAG_START,
    ELEMENT_CLOSING_TAG,
    ELEMENT_VALUE,
    PROLOG,          // <? (ends with ?>)
    PROLOG_READ_QUESTIONMARK,
    DTD,             // <!DOCTYPE
    DTD_LIST,        // [ (inside DTD)
    //DTD_ELEMENT, // <!ELEMENT (inside DTD_LIST)
    //DTD_ENTITY, //  <!ENTITY (inside DTD_LIST)
  };

  Node d_rootnode;
  Node *d_currentnode;
  std::string d_prolog;
  bool d_ok;

 public:
  explicit XmlDocument(std::string const &filename);
  inline bool ok() const;
  inline void print() const;
  inline Node const &root() const;
};

inline XmlDocument::Node::Node(Node *parent)
  :
  d_parent(parent),
  is_closed(false),
  is_text_node(false)
{}

inline void XmlDocument::Node::print(int indent) const
{
  if (!is_text_node)
  {
    Logger::message_start(std::string(indent, ' '), "<", d_name);
    for (auto const &[key, value] : d_attributes)
    {
      if (value.pos == -1) [[likely]]
        Logger::message_continue(" ", key, "=\"", value.value, "\""); // note, we should maybe scan for " and use ' if found
      else
        Logger::message_continue(" ", key, "=\"", "[", value.size, " bytes]", "\"");
    }
    Logger::message_continue((d_children.empty() && d_value.empty()) ? " />" : ">");
    if (d_value.empty())
      Logger::message_end();
    for (auto const &n : d_children)
      n.print(indent + 2);
    if (!d_children.empty() || !d_value.empty())
      Logger::message((d_value.empty() ? std::string(indent, ' ') : ""), "</", d_name, ">");
  }
  else
    Logger::message("'", d_value, "'");
}

inline bool XmlDocument::Node::isTextNode() const
{
  return is_text_node;
}

inline std::string const &XmlDocument::Node::name() const
{
  return d_name;
}

inline bool XmlDocument::Node::hasAttribute(std::string const &name) const
{
  return bepaald::contains(d_attributes, name);
}

inline std::string XmlDocument::Node::getAttribute(std::string const &name) const
{
  if (auto it = d_attributes.find(name); it != d_attributes.end()) [[likely]]
  {
    if (it->second.pos == -1)
      return it->second.value;
    else
    {
      std::ifstream tmp(it->second.file, std::ios_base::in | std::ios_base::binary);
      tmp.seekg(it->second.pos);
      std::unique_ptr<char[]> v(new char[it->second.size]);
      tmp.read(v.get(), it->second.size);
      return std::string(tmp.get(), it->second.size);
    }
  }
  return std::string();
}

inline XmlDocument::Node::StringOrRef XmlDocument::Node::getAttributeStringOrRef(std::string const &name) const
{
  if (auto it = d_attributes.find(name); it != d_attributes.end()) [[likely]]
    return it->second;
  return StringOrRef{};
}

inline auto XmlDocument::Node::begin() const
{
  return d_children.begin();
}

inline auto XmlDocument::Node::end() const
{
  return d_children.end();
}

inline bool XmlDocument::ok() const
{
  return d_ok;
}

inline void XmlDocument::print() const
{
  if (!d_prolog.empty())
    Logger::message("<?", d_prolog, "?>");
  d_rootnode.print();
}

inline XmlDocument::Node const &XmlDocument::root() const
{
  return d_rootnode;
}

#endif