1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
|
/************************************************************************
**
** Copyright (C) 2015-2020 Kevin B. Hendricks Stratford, ON, Canada
** Copyright (C) 2012 John Schember <john@nachtimwald.com>
**
** This file is part of PageEdit.
**
** PageEdit is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** PageEdit is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with PageEdit. If not, see <http://www.gnu.org/licenses/>.
**
*************************************************************************/
#ifndef GUMBO_INTERFACE
#define GUMBO_INTERFACE
#include <stdlib.h>
#include <string>
#include <unordered_set>
#include "gumbo.h"
#include "gumbo_edit.h"
#include <QString>
#include <QList>
#include <QHash>
class QString;
struct GumboWellFormedError {
int line;
int column;
QString message;
};
class GumboInterface
{
public:
GumboInterface(const QString &source, const QString &version);
GumboInterface(const QString &source, const QString &version, const QHash<QString, QString> &source_updates);
~GumboInterface();
void parse();
void parse_fragment();
QString repair();
QString getxhtml();
QString get_fragment_xhtml();
QString prettyprint(QString indent_chars=" ");
// returns list tags that match manifest properties
QStringList get_all_properties();
// returns "html" node
GumboNode * get_root_node();
// return document node
GumboNode * get_document_node();
// returns body node or NULL if none exists
GumboNode * get_body_node();
// routines for working with gumbo paths
GumboNode* get_node_from_path(QList<unsigned int> & apath);
QList<unsigned int> get_path_to_node(GumboNode* node);
// routines for working with qwebpaths
GumboNode* get_node_from_qwebpath(QString webpath);
QString get_qwebpath_to_node(GumboNode* node);
// routines for updating while serializing (see SourceUpdates and AnchorUpdates
QString perform_source_updates(const QString & my_current_book_relpath, const QString& newbookpath);
QString perform_style_updates(const QString & my_current_book_relpath, const QString& newbookpath);
QString perform_link_updates(const QString & newlinks);
QString get_body_contents();
QString perform_body_updates(const QString & new_body);
// routines for working with nodes with specific attributes
QList<GumboNode*> get_all_nodes_with_attribute(const QString & attname);
QStringList get_all_values_for_attribute(const QString & attname);
QHash<QString,QString> get_attributes_of_node(GumboNode* node);
// routines for working with nodes with specific tags
QList<GumboNode*> get_all_nodes_with_tag(GumboTag tag);
QList<GumboNode*> get_all_nodes_with_tags(const QList<GumboTag> & tags);
// utility routines
std::string get_tag_name(GumboNode *node);
QString get_local_text_of_node(GumboNode* node);
QString get_body_text();
// routine to check if well-formed
QList<GumboWellFormedError> error_check();
QList<GumboWellFormedError> fragment_error_check();
// routines to work with node and its children only
QList<GumboNode*> get_nodes_with_attribute(GumboNode* node, const char * att_name);
QList<GumboNode*> get_nodes_with_tags(GumboNode* node, const QList<GumboTag> & tags);
QList<GumboNode*> get_nodes_with_comments(GumboNode * node);
QList<GumboNode*> get_element_nodes_with_prefix(GumboNode * node, const std::string& prefix);
private:
enum UpdateTypes {
NoUpdates = 0,
SourceUpdates = 1 << 0,
LinkUpdates = 1 << 1,
BodyUpdates = 1 << 2,
StyleUpdates = 1 << 3
};
QStringList get_properties(GumboNode* node);
QStringList get_values_for_attr(GumboNode* node, const char* attr_name);
std::string serialize(GumboNode* node, enum UpdateTypes doupdates = NoUpdates);
std::string serialize_contents(GumboNode* node, enum UpdateTypes doupdates = NoUpdates);
std::string prettyprint(GumboNode* node, int lvl, const std::string indent_chars);
std::string prettyprint_contents(GumboNode* node, int lvl, const std::string indent_chars);
std::string build_doctype(GumboNode *node);
std::string get_attribute_name(GumboAttribute * at);
std::string build_attributes(GumboAttribute * at, bool no_entities, bool run_src_updates = false, bool run_style_updates = false);
std::string update_attribute_value(const std::string &href);
std::string update_style_urls(const std::string& source);
std::string substitute_xml_entities_into_text(const std::string &text);
std::string substitute_xml_entities_into_attributes(char quote, const std::string &text);
bool in_set(std::unordered_set<std::string> &s, std::string &key);
void rtrim(std::string &s);
void ltrim(std::string &s);
void ltrimnewlines(std::string &s);
void newlinetrim(std::string &s);
void condense_whitespace(std::string &s);
void replace_all(std::string &s, const char * s1, const char * s2);
// Hopefully now unneeded
// QString fix_self_closing_tags(const QString & source);
QString m_source;
GumboOutput* m_output;
std::string m_utf8src;
const QHash<QString, QString> & m_sourceupdates;
std::string m_newcsslinks;
QString m_currentbkpath;
QString m_currentdir;
std::string m_newbody;
QString m_version;
QString m_newbookpath;
};
#endif
|