1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef INCLUDED_ORCUS_SAX_TOKEN_PARSER_HPP
#define INCLUDED_ORCUS_SAX_TOKEN_PARSER_HPP
#include "sax_ns_parser.hpp"
#include "types.hpp"
#include <vector>
#include <algorithm>
#include <functional>
namespace orcus {
class tokens;
class ORCUS_PSR_DLLPUBLIC sax_token_handler_wrapper_base
{
protected:
xml_declaration_t m_declaration;
xml_token_element_t m_elem;
const tokens& m_tokens;
xml_token_t tokenize(std::string_view name) const;
void set_element(const sax_ns_parser_element& elem);
public:
sax_token_handler_wrapper_base(const tokens& _tokens);
void attribute(std::string_view name, std::string_view val);
void attribute(const sax_ns_parser_attribute& attr);
};
class sax_token_handler
{
public:
/**
* Called immediately after the entire XML declaration has been parsed.
*
* @param decl struct containing the attributes of the XML declaration.
*/
void declaration(const orcus::xml_declaration_t& decl)
{
(void)decl;
}
/**
* Called at the start of each element.
*
* @param elem struct containing the element's information as well as all
* the attributes that belong to the element.
*/
void start_element(const orcus::xml_token_element_t& elem)
{
(void)elem;
}
/**
* Called at the end of each element.
*
* @param elem struct containing the element's information as well as all
* the attributes that belong to the element.
*/
void end_element(const orcus::xml_token_element_t& elem)
{
(void)elem;
}
/**
* Called when a segment of a text content is parsed. Each text content
* is a direct child of an element, which may have multiple child contents
* when the element also has a child element that are direct sibling to
* the text contents or the text contents are splitted by a comment.
*
* @param val value of the text content.
* @param transient when true, the text content has been converted and is
* stored in a temporary buffer due to presence of one or
* more encoded characters, in which case <em>the passed
* text value needs to be either immediately converted to
* a non-text value or be interned within the scope of
* the callback</em>.
*/
void characters(std::string_view val, bool transient)
{
(void)val; (void)transient;
}
};
/**
* SAX parser that tokenizes element and attribute names while parsing. All
* pre-defined elements and attribute names are translated into integral
* identifiers via use of @ref tokens. The user of this class needs to
* provide a pre-defined set of element and attribute names at construction
* time.
*
* This parser internally uses @ref sax_ns_parser.
*
* @tparam HandlerT Handler type with member functions for event callbacks.
* Refer to @ref sax_token_handler.
*/
template<typename HandlerT>
class sax_token_parser
{
public:
typedef HandlerT handler_type;
sax_token_parser(
std::string_view content, const tokens& _tokens,
xmlns_context& ns_cxt, handler_type& handler);
~sax_token_parser() = default;
void parse();
private:
/**
* Re-route callbacks from the internal sax_ns_parser into the
* sax_token_parser callbacks.
*/
class handler_wrapper : public sax_token_handler_wrapper_base
{
handler_type& m_handler;
public:
handler_wrapper(const tokens& _tokens, handler_type& handler) :
sax_token_handler_wrapper_base(_tokens), m_handler(handler) {}
void doctype(const sax::doctype_declaration&) {}
void start_declaration(std::string_view) {}
void end_declaration(std::string_view)
{
m_handler.declaration(m_declaration);
m_elem.attrs.clear();
}
void start_element(const sax_ns_parser_element& elem)
{
set_element(elem);
m_handler.start_element(m_elem);
m_elem.attrs.clear();
}
void end_element(const sax_ns_parser_element& elem)
{
set_element(elem);
m_handler.end_element(m_elem);
}
void characters(std::string_view val, bool transient)
{
m_handler.characters(val, transient);
}
};
private:
handler_wrapper m_wrapper;
sax_ns_parser<handler_wrapper> m_parser;
};
template<typename HandlerT>
sax_token_parser<HandlerT>::sax_token_parser(
std::string_view content, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) :
m_wrapper(_tokens, handler),
m_parser(content, ns_cxt, m_wrapper)
{
}
template<typename HandlerT>
void sax_token_parser<HandlerT>::parse()
{
m_parser.parse();
}
} // namespace orcus
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|