File: uri.hpp

package info (click to toggle)
libzeep 7.3.2-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,372 kB
  • sloc: cpp: 17,430; javascript: 180; makefile: 12; sh: 11
file content (391 lines) | stat: -rw-r--r-- 10,187 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
//          Copyright Maarten L. Hekkelman, 2021-2026
//   Distributed under the Boost Software License, Version 1.0.
//      (See accompanying file LICENSE_1_0.txt or copy at
//            http://www.boost.org/LICENSE_1_0.txt)

#pragma once

// A simple uri class.

#include "zeep/exception.hpp"
#include "zeep/unicode-support.hpp"

#include <cstdint>

namespace zeep
{

// --------------------------------------------------------------------

/// \brief Simply check the URI in \a uri, returns true if the uri is valid
/// \param uri		The URI to check
bool is_valid_uri(const std::string &uri);

/// \brief Check the URI in \a uri, returns true if the uri is fully qualified (has a scheme and path)
/// \param uri		The URI to check
bool is_fully_qualified_uri(const std::string &uri);

/// \brief Check the parameter in \a host is of the form HOST:PORT as required by CONNECT
/// \param host		The host string to check
bool is_valid_connect_host(std::string_view host);

// --------------------------------------------------------------------

/// \brief Decode a URL using the RFC rules
/// \param s  The URL that needs to be decoded
/// \return	  The decoded URL
std::string decode_url(std::string_view s);

/// \brief Encode a URL using the RFC rules
/// \param s  The URL that needs to be encoded
/// \return	  The encoded URL
std::string encode_url(std::string_view s);

// --------------------------------------------------------------------

/// \brief the exception thrown by libzeep when an invalid uri is passed to
///        the uri constructor.
class uri_parse_error : public zeep::exception
{
  public:
	uri_parse_error()
		: exception("invalid uri") {};
	uri_parse_error(const std::string &u)
		: exception("invalid uri: " + u) {};
};

// --------------------------------------------------------------------

/// \brief A class modelling a URI based on RFC 3986 https://www.rfc-editor.org/rfc/rfc3986
///
/// All components are stored separately. Scheme and host are converted to lower case.
/// Path segments are stored decoded whereas query and fragment are stored encoded.
/// This is to avoid double encoding and ease post processing of queries e.g.
class uri
{
  public:
	/// \brief constructor for an empty uri
	uri() = default;

	/// \brief constructor that parses the URI in \a s, throws exception if not valid
	uri(const std::string &s);

	/// \brief constructor that parses the URI in \a s, throws exception if not valid
	uri(const char *s);

	/// \brief constructor that parses the URI in \a s relative to the baseuri in \a base, throws exception if not valid
	uri(const std::string &s, const uri &base);

	/// \brief constructor taking two iterators into path segments, for a relative path
	template <typename InputIterator>
	uri(InputIterator b, InputIterator e)
		requires(std::is_constructible_v<std::string, typename InputIterator::value_type>)
		: uri()
	{
		for (auto i = b; i != e; ++i)
			m_path.emplace_back(*i);
	}

	~uri() = default;

	uri(const uri &u) = default;

	uri(uri &&u) noexcept
	{
		swap(*this, u);
	}

	uri &operator=(uri u) noexcept
	{
		swap(*this, u);
		return *this;
	}

	friend void swap(uri &lhs, uri &rhs) noexcept;

	// --------------------------------------------------------------------

	[[nodiscard]] bool has_scheme() const
	{
		return not m_scheme.empty();
	}

	[[nodiscard]] bool has_authority() const
	{
		return not(m_userinfo.empty() and m_host.empty() and m_port == 0);
	}

	[[nodiscard]] bool has_path() const
	{
		return not m_path.empty();
	}

	[[nodiscard]] bool has_query() const
	{
		return not m_query.empty();
	}

	[[nodiscard]] bool has_fragment() const
	{
		return not m_fragment.empty();
	}

	/// \brief Return true if url is empty
	[[nodiscard]] bool empty() const
	{
		return not(
			has_scheme() or has_authority() or has_path() or has_query() or has_fragment());
	}

	/// \brief Return true if the path is absolute
	[[nodiscard]] bool is_absolute() const
	{
		return m_absolutePath;
	}

	/// \brief Return the scheme
	[[nodiscard]] const std::string &get_scheme() const
	{
		return m_scheme;
	}

	/// \brief Set the scheme to \a scheme
	void set_scheme(std::string scheme)
	{
		m_scheme = std::move(scheme);
		zeep::to_lower(m_scheme);
	}

	/// \brief Return the user info
	[[nodiscard]] const std::string &get_userinfo() const
	{
		return m_userinfo;
	}

	/// \brief Set the userinfo to \a userinfo
	void set_userinfo(std::string userinfo)
	{
		m_userinfo = std::move(userinfo);
	}

	/// \brief Return the host
	[[nodiscard]] const std::string &get_host() const
	{
		return m_host;
	}

	/// \brief Set the host to \a host
	void set_host(std::string host)
	{
		m_host = std::move(host);
		zeep::to_lower(m_host);
	}

	/// \brief Return the port
	[[nodiscard]] uint16_t get_port() const
	{
		return m_port;
	}

	/// \brief Set the port to \a port
	void set_port(uint16_t port)
	{
		m_port = port;
	}

	/// \brief Return a uri containing only the path
	[[nodiscard]] uri get_path() const;

	/// \brief Get the individual segments of the path
	[[nodiscard]] const std::vector<std::string> &get_segments() const
	{
		return m_path;
	}

	/// \brief Set the path to \a path
	void set_path(const std::string &path);

	/// \brief Return the query
	[[nodiscard]] std::string get_query(bool decoded) const
	{
		return decoded ? decode_url(m_query) : m_query;
	}

	/// \brief Set the query to \a query and optionally encode it based on \a encode
	void set_query(std::string query, bool encode);

	/// \brief Return the fragment
	[[nodiscard]] std::string get_fragment(bool decoded) const
	{
		return decoded ? decode_url(m_fragment) : m_fragment;
	}

	/// \brief Set the fragment to \a fragment and optionally encode it based on \a encode
	void set_fragment(std::string fragment, bool encode);

	/// \brief Return the uri as a string
	[[nodiscard]] std::string string() const;

	/// \brief Return the uri as a string, without encoded characters
	[[nodiscard]] std::string unencoded_string() const;

	/// \brief Write the uri in \a u to the stream \a os
	friend std::ostream &operator<<(std::ostream &os, const uri &u)
	{
		u.write(os, true);
		return os;
	}

	/// \brief Extend path
	uri &operator/=(const uri &rhs);

	/// \brief Extend path
	friend uri operator/(uri lhs, const uri &rhs)
	{
		return lhs /= rhs;
	}

	/// \brief Comparison
	[[nodiscard]] bool operator==(const uri &rhs) const
	{
		return m_scheme == rhs.m_scheme and
		       m_userinfo == rhs.m_userinfo and
		       m_host == rhs.m_host and
		       m_port == rhs.m_port and
		       m_path == rhs.m_path and
		       m_query == rhs.m_query and
		       m_fragment == rhs.m_fragment and
		       m_absolutePath == rhs.m_absolutePath;
	}

	/// \brief return the uri relative from \a base.
	///
	/// If the scheme and authority of this and \a base
	/// a relative uri will be returned with the path
	/// of base removed from this path.
	[[nodiscard]] uri relative(const uri &base) const;

  private:
	enum class char_class : uint8_t
	{
		gen_delim = 1 << 0,
		sub_delim = 1 << 1,
		reserved = gen_delim | sub_delim,
		unreserved = 1 << 2,
		scheme = 1 << 3,
		hexdigit = 1 << 4,
		alpha = 1 << 5
	};

	static constexpr uint8_t kCharClassTable[] = {
		// clang-format off
		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
		 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 
		 0,  2,  0,  1,  2,  0,  2,  2,  2,  2,  2, 10,  2, 12, 12,  1, 
		28, 28, 28, 28, 28, 28, 28, 28, 28, 28,  1,  2,  0,  2,  0,  1, 
		 1, 60, 60, 60, 60, 60, 60, 44, 44, 44, 44, 44, 44, 44, 44, 44, 
		44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,  1,  0,  1,  0,  4, 
		 0, 60, 60, 60, 60, 60, 60, 44, 44, 44, 44, 44, 44, 44, 44, 44, 
		44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,  0,  0,  0,  4,  0,
		// clang-format on
	};

  public:
	static inline constexpr bool is_char_class(int ch, char_class mask)
	{
		return ch > 0 and ch < 128 and (kCharClassTable[static_cast<uint8_t>(ch)] bitand static_cast<char>(mask)) != 0;
	}

	static inline constexpr bool is_gen_delim(int ch)
	{
		return is_char_class(ch, char_class::gen_delim);
	}

	static inline constexpr bool is_sub_delim(int ch)
	{
		return is_char_class(ch, char_class::sub_delim);
	}

	static inline constexpr bool is_reserved(int ch)
	{
		return is_char_class(ch, char_class::reserved);
	}

	static inline constexpr bool is_unreserved(int ch)
	{
		return is_char_class(ch, char_class::unreserved);
	}

	static inline constexpr bool is_scheme_start(int ch)
	{
		return is_char_class(ch, char_class::alpha);
	}

	static inline constexpr bool is_scheme(int ch)
	{
		return is_char_class(ch, char_class::scheme);
	}

	static inline constexpr bool is_xdigit(int ch)
	{
		return is_char_class(ch, char_class::hexdigit);
	}

	friend std::string encode_url(std::string_view s);

  private:
	// --------------------------------------------------------------------

	bool is_pct_encoded(const char *&cp)
	{
		bool result = false;
		if (*cp == '%' and is_xdigit(cp[1]) and is_xdigit(cp[2]))
		{
			result = true;
			cp += 2;
		}
		return result;
	}

	bool is_userinfo(const char *&cp)
	{
		return is_unreserved(*cp) or is_sub_delim(*cp) or *cp == ':' or is_pct_encoded(cp);
	}

	bool is_reg_name(const char *&cp)
	{
		return is_unreserved(*cp) or is_sub_delim(*cp) or is_pct_encoded(cp);
	}

	bool is_pchar(const char *&cp)
	{
		return is_unreserved(*cp) or is_sub_delim(*cp) or *cp == ':' or *cp == '@' or is_pct_encoded(cp);
	}

	void parse(const char *s);
	void transform(const uri &base);
	void remove_dot_segments();

	const char *parse_scheme(const char *ch);
	const char *parse_authority(const char *ch);
	const char *parse_host(const char *ch);
	const char *parse_hierpart(const char *ch);
	const char *parse_segment(const char *ch);
	const char *parse_segment_nz(const char *ch);
	const char *parse_segment_nz_nc(const char *ch);

	void write(std::ostream &os, bool encoded) const;

	// --------------------------------------------------------------------

	std::string m_scheme;
	std::string m_userinfo;
	std::string m_host;
	uint16_t m_port = 0;
	std::vector<std::string> m_path;
	std::string m_query;
	std::string m_fragment;
	bool m_absolutePath = false;
};

} // namespace zeep