File: document.hpp

package info (click to toggle)
libzeep 5.1.8-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 3,596 kB
  • sloc: cpp: 27,393; xml: 7,798; javascript: 180; sh: 37; makefile: 8
file content (294 lines) | stat: -rw-r--r-- 10,895 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
// Copyright Maarten L. Hekkelman, Radboud University 2008-2013.
//        Copyright Maarten L. Hekkelman, 2014-2022
//  Distributed under the Boost Software License, Version 1.0.
//     (See accompanying file LICENSE_1_0.txt or copy at
//           http://www.boost.org/LICENSE_1_0.txt)

#pragma once

/// \file
/// definition of the zeep::xml::document class

#include <zeep/config.hpp>

#include <zeep/xml/character-classification.hpp>
#include <zeep/xml/parser.hpp>
#include <zeep/xml/serialize.hpp>

namespace zeep::xml
{

/// zeep::xml::document is the class that contains a parsed XML file.
/// You can create an empty document and add nodes to it, or you can
/// create it by specifying a string containing XML or an std::istream
/// to parse.
///
/// If you use an std::fstream to read a file, be sure to open the file
/// ios::binary. Otherwise, the detection of text encoding might go wrong
/// or the content can become corrupted.
///
/// Default is to parse CDATA sections into zeep::xml::text nodes. If you
/// want to preserve CDATA sections in the DOM tree, you have to call
/// set_preserve_cdata before reading the file.
///
/// By default a document is not validated. But you can turn on validation
/// by using the appropriate constructor or read method, or by setting
/// set_validating explicitly. The DTD's will be loaded from the base dir
/// specified, but you can change this by assigning a external_entity_ref_handler.
///
/// A document has one zeep::xml::root_node element. This root element
/// can have only one zeep::xml::element child node.

struct doc_type
{
	std::string m_root;
	std::string m_pubid;	/// pubid is empty for SYSTEM DOCTYPE
	std::string m_dtd;
};

class document : public element
{
  public:

	/// \brief Constructor for an empty document.
	document();

	/// \brief Copy constructor
	document(const document& doc);

	/// \brief Move constructor
	document(document&& other);

	/// \brief Copy operator=
	document& operator=(const document& doc);

	/// \brief Move operator=
	document& operator=(document&& other);

	/// \brief Constructor that will parse the XML passed in argument using default settings \a s
	document(const std::string& s);

	/// \brief Constructor that will parse the XML passed in argument using default settings \a is
	document(std::istream& is);

	/// \brief Constructor that will parse the XML passed in argument \a is. This
	/// constructor will also validate the input using DTD's found in \a base_dir
	document(std::istream& is, const std::string& base_dir);

	virtual ~document();

	/// options for parsing
	/// validating uses a DTD if it is defined
	bool is_validating() const								{ return m_validating; }
	void set_validating(bool validate)						{ m_validating = validate; }

	/// validating_ns: when validating take the NS 1.0 specification into account
	bool is_validating_ns() const							{ return m_validating_ns; }
	void set_validating_ns(bool validate)					{ m_validating_ns = validate; }

	/// preserve cdata, preserves CDATA sections instead of converting them
	/// into text nodes.
	bool preserves_cdata() const							{ return m_preserve_cdata; }

	/// \brief if \a p is true, the CDATA sections will be preserved when parsing XML, if \a p is false, the content of the CDATA will be treated as text
	void set_preserve_cdata(bool p)							{ m_preserve_cdata = p; }

	/// \brief collapse means replacing e.g. `<foo></foo>` with `<foo/>`
	bool collapses_empty_tags() const						{ return m_fmt.collapse_tags; }

	/// \brief if \a c is true, empty tags will be replaced, i.e. write `<foo/>` instead of `<foo></foo>`
	void set_collapse_empty_tags(bool c)					{ m_fmt.collapse_tags = c; }

	/// \brief collapse 'empty elements' according to HTML rules
	bool write_html() const									{ return m_fmt.html; }

	/// \brief if \a c is true, 'empty elements' will be collapsed according to HTML rules
	void set_write_html(bool f)								{ m_fmt.html = f; }

	/// \brief whether to write out comments
	bool suppresses_comments() const						{ return m_fmt.suppress_comments; }

	/// \brief if \a s is true, comments will not be written
	void set_suppress_comments(bool s)						{ m_fmt.suppress_comments = s; }

	/// \brief whether to escape white space
	bool escapes_white_space() const						{ return m_fmt.escape_white_space; }

	/// \brief if \a e is true, white space will be written as XML entities
	void set_escape_white_space(bool e)						{ m_fmt.escape_white_space = e; }

	/// \brief whether to escape double quotes
	bool escapes_double_quote() const						{ return m_fmt.escape_double_quote; }

	/// \brief if \a e is true, double quotes will be written as &quot;
	void set_escape_double_quote(bool e)					{ m_fmt.escape_double_quote = e; }

	/// \brief whether to place a newline after a prolog
	bool wraps_prolog() const								{ return m_wrap_prolog; }

	/// \brief if \a w is true, a newline will be written after the XML prolog
	void set_wrap_prolog(bool w)							{ m_wrap_prolog = w; }

	/// \brief Get the doctype as parsed
	doc_type get_doctype() const							{ return m_doctype; }

	/// \brief Set the doctype to write out
	void set_doctype(const std::string& root, const std::string& pubid, const std::string& dtd)
	{
		set_doctype({root, pubid, dtd});
	}

	/// Set the doctype to write out
	void set_doctype(const doc_type& doctype)				{ m_doctype = doctype; m_write_doctype = true; }

	/// \brief whether to write a XML prolog
	bool writes_xml_decl() const							{ return m_write_xml_decl; }

	/// \brief if \a w is true, an XML prolog will be written
	void set_write_xml_decl(bool w)							{ m_write_xml_decl = w; }

	/// \brief whether to write a DOCTYPE
	bool writes_doctype() const								{ return m_write_doctype; }

	/// \brief if \a f is true a DOCTYPE will be written
	void set_write_doctype(bool f)							{ m_write_doctype = f; }

	/// \brief Check the doctype to see if this is supposed to be HTML5
	bool is_html5() const;

	/// \brief Write out the document
	friend std::ostream& operator<<(std::ostream& os, const document& doc);

	/// \brief Read in a document
	friend std::istream& operator>>(std::istream& is, document& doc);

	/// \brief Serialization support
	template <typename T>
	void serialize(const char* name, const T& data); ///< Serialize \a data into a document containing \a name as root node

	/// \brief Serialization support
	template <typename T>
	void deserialize(const char* name, T& data); ///< Deserialize root node with name \a name into \a data.

	/// Compare two xml documents
	bool operator==(const document& doc) const;
	bool operator!=(const document& doc) const { return not operator==(doc); }

	/// If you want to validate the document using DTD files stored on disk, you can specifiy this directory prior to reading
	/// the document.
	void set_base_dir(const std::string& path);

	/// If you want to be able to load external documents other than trying to read them from disk
	/// you can set a callback here.
	template<typename Callback>
	void set_entity_loader(Callback&& cb)
	{
		m_external_entity_ref_loader = cb;
	}

	encoding_type get_encoding() const;   ///< The text encoding as detected in the input.
	void set_encoding(encoding_type enc); ///< The text encoding to use for output

	float get_version() const;			///< XML version, should be either 1.0 or 1.1
	void set_version(float v);			///< XML version, should be either 1.0 or 1.1

	virtual element* root()					{ return this; }
	virtual const element* root() const		{ return this; }

	virtual node* child()					{ return empty() ? nullptr : &front(); }
	virtual const node* child() const		{ return empty() ? nullptr : &front(); }

  protected:

	virtual node_iterator insert_impl(const_iterator pos, node* n);

	void XmlDeclHandler(encoding_type encoding, bool standalone, float version);
	void StartElementHandler(const std::string& name, const std::string& uri, const parser::attr_list_type& atts);
	void EndElementHandler(const std::string& name, const std::string& uri);
	void CharacterDataHandler(const std::string& data);
	void ProcessingInstructionHandler(const std::string& target, const std::string& data);
	void CommentHandler(const std::string& comment);
	void StartCdataSectionHandler();
	void EndCdataSectionHandler();
	void StartNamespaceDeclHandler(const std::string& prefix, const std::string& uri);
	void EndNamespaceDeclHandler(const std::string& prefix);
	void DoctypeDeclHandler(const std::string& root, const std::string& publicId, const std::string& uri);
	void NotationDeclHandler(const std::string& name, const std::string& sysid, const std::string& pubid);

	std::istream* external_entity_ref(const std::string& base, const std::string& pubid, const std::string& sysid);
	void parse(std::istream& data);

	// /// \brief To read a document and process elements on the go, use this streaming input function.
	// /// If the \a proc callback retuns false, processing is terminated. The \a doc_root parameter of
	// /// the callback is the leading xml up to the first element.
	// void process_document_elements(std::istream& data, const std::string& element_xpath,
	// 							std::function<bool(node* doc_root, element* e)> cb);

	/// The default for libzeep is to locate the external reference based
	/// on sysid and base_dir. Only local files are loaded this way.
	/// You can specify a entity loader here if you want to be able to load
	/// DTD files from another source.
	std::function<std::istream *(const std::string& base, const std::string& pubid, const std::string& sysid)>
		m_external_entity_ref_loader;

	virtual void write(std::ostream& os, format_info fmt) const;

	std::string m_dtd_dir;

	// some content information
	doc_type m_doctype;
	bool m_validating;
	bool m_validating_ns = false;
	bool m_preserve_cdata;
	bool m_has_xml_decl;
	encoding_type m_encoding;
	float m_version;
	bool m_standalone;
	bool m_wrap_prolog = true;
	bool m_write_doctype = false;
	bool m_write_xml_decl = false;

	format_info m_fmt;

	struct notation
	{
		std::string m_name;
		std::string m_sysid;
		std::string m_pubid;
	};

	element* m_cur = nullptr; // construction
	cdata* m_cdata = nullptr; // only defined in a CDATA section
	std::vector<std::pair<std::string, std::string>> m_namespaces;
	std::list<notation> m_notations;
	size_t m_root_size_at_first_notation = 0;	// for processing instructions that occur before a notation
};

namespace literals
{

document operator""_xml(const char* text, size_t length);

}

template <typename T>
void document::serialize(const char* name, const T& data)
{
	serializer sr(*this);
	sr.serialize_element(name, data);
}

template <typename T>
void document::deserialize(const char* name, T& data)
{
	if (child() == nullptr)
		throw zeep::exception("empty document");

	if (child()->name() != name)
		throw zeep::exception("root mismatch");

	deserializer sr(*this);
	sr.deserialize_element(name, data);
}

} // namespace zeep::xml