1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
|
Index: qtbase-opensource-src-5.15.2+dfsg/src/corelib/serialization/qxmlstream.cpp
===================================================================
--- qtbase-opensource-src-5.15.2+dfsg.orig/src/corelib/serialization/qxmlstream.cpp 2024-04-25 08:25:46.389994553 +0200
+++ qtbase-opensource-src-5.15.2+dfsg/src/corelib/serialization/qxmlstream.cpp 2024-04-25 08:25:46.385994556 +0200
@@ -160,7 +160,7 @@
addData() or by waiting for it to arrive on the device().
\value UnexpectedElementError The parser encountered an element
- that was different to those it expected.
+ or token that was different to those it expected.
*/
@@ -295,13 +295,34 @@
QXmlStreamReader is a well-formed XML 1.0 parser that does \e not
include external parsed entities. As long as no error occurs, the
- application code can thus be assured that the data provided by the
- stream reader satisfies the W3C's criteria for well-formed XML. For
- example, you can be certain that all tags are indeed nested and
- closed properly, that references to internal entities have been
- replaced with the correct replacement text, and that attributes have
- been normalized or added according to the internal subset of the
- DTD.
+ application code can thus be assured, that
+ \list
+ \li the data provided by the stream reader satisfies the W3C's
+ criteria for well-formed XML,
+ \li tokens are provided in a valid order.
+ \endlist
+
+ Unless QXmlStreamReader raises an error, it guarantees the following:
+ \list
+ \li All tags are nested and closed properly.
+ \li References to internal entities have been replaced with the
+ correct replacement text.
+ \li Attributes have been normalized or added according to the
+ internal subset of the \l DTD.
+ \li Tokens of type \l StartDocument happen before all others,
+ aside from comments and processing instructions.
+ \li At most one DOCTYPE element (a token of type \l DTD) is present.
+ \li If present, the DOCTYPE appears before all other elements,
+ aside from StartDocument, comments and processing instructions.
+ \endlist
+
+ In particular, once any token of type \l StartElement, \l EndElement,
+ \l Characters, \l EntityReference or \l EndDocument is seen, no
+ tokens of type StartDocument or DTD will be seen. If one is present in
+ the input stream, out of order, an error is raised.
+
+ \note The token types \l Comment and \l ProcessingInstruction may appear
+ anywhere in the stream.
If an error occurs while parsing, atEnd() and hasError() return
true, and error() returns the error that occurred. The functions
@@ -620,6 +641,7 @@
d->token = -1;
return readNext();
}
+ d->checkToken();
return d->type;
}
@@ -740,6 +762,14 @@
};
+static const char QXmlStreamReader_XmlContextString[] =
+ "Prolog\0"
+ "Body\0";
+
+static const short QXmlStreamReader_XmlContextString_indices[] = {
+ 0, 7
+};
+
/*!
\property QXmlStreamReader::namespaceProcessing
The namespace-processing flag of the stream reader
@@ -775,6 +805,16 @@
QXmlStreamReader_tokenTypeString_indices[d->type]);
}
+/*!
+ \internal
+ \return \param ctxt (Prolog/Body) as a string.
+ */
+QString contextString(QXmlStreamReaderPrivate::XmlContext ctxt)
+{
+ return QLatin1String(QXmlStreamReader_XmlContextString +
+ QXmlStreamReader_XmlContextString_indices[static_cast<int>(ctxt)]);
+}
+
#endif // QT_NO_XMLSTREAMREADER
QXmlStreamPrivateTagStack::QXmlStreamPrivateTagStack()
@@ -866,6 +906,8 @@
type = QXmlStreamReader::NoToken;
error = QXmlStreamReader::NoError;
+ currentContext = XmlContext::Prolog;
+ foundDTD = false;
}
/*
@@ -4059,6 +4101,92 @@
}
}
+static bool isTokenAllowedInContext(QXmlStreamReader::TokenType type,
+ QXmlStreamReaderPrivate::XmlContext loc)
+{
+ switch (type) {
+ case QXmlStreamReader::StartDocument:
+ case QXmlStreamReader::DTD:
+ return loc == QXmlStreamReaderPrivate::XmlContext::Prolog;
+
+ case QXmlStreamReader::StartElement:
+ case QXmlStreamReader::EndElement:
+ case QXmlStreamReader::Characters:
+ case QXmlStreamReader::EntityReference:
+ case QXmlStreamReader::EndDocument:
+ return loc == QXmlStreamReaderPrivate::XmlContext::Body;
+
+ case QXmlStreamReader::Comment:
+ case QXmlStreamReader::ProcessingInstruction:
+ return true;
+
+ case QXmlStreamReader::NoToken:
+ case QXmlStreamReader::Invalid:
+ return false;
+ default:
+ return false;
+ }
+}
+
+/*!
+ \internal
+ \brief QXmlStreamReader::isValidToken
+ \return \c true if \param type is a valid token type.
+ \return \c false if \param type is an unexpected token,
+ which indicates a non-well-formed or invalid XML stream.
+ */
+bool QXmlStreamReaderPrivate::isValidToken(QXmlStreamReader::TokenType type)
+{
+ // Don't change currentContext, if Invalid or NoToken occur in the prolog
+ if (type == QXmlStreamReader::Invalid || type == QXmlStreamReader::NoToken)
+ return false;
+
+ // If a token type gets rejected in the body, there is no recovery
+ const bool result = isTokenAllowedInContext(type, currentContext);
+ if (result || currentContext == XmlContext::Body)
+ return result;
+
+ // First non-Prolog token observed => switch context to body and check again.
+ currentContext = XmlContext::Body;
+ return isTokenAllowedInContext(type, currentContext);
+}
+
+/*!
+ \internal
+ Checks token type and raises an error, if it is invalid
+ in the current context (prolog/body).
+ */
+void QXmlStreamReaderPrivate::checkToken()
+{
+ Q_Q(QXmlStreamReader);
+
+ // The token type must be consumed, to keep track if the body has been reached.
+ const XmlContext context = currentContext;
+ const bool ok = isValidToken(type);
+
+ // Do nothing if an error has been raised already (going along with an unexpected token)
+ if (error != QXmlStreamReader::Error::NoError)
+ return;
+
+ if (!ok) {
+ raiseError(QXmlStreamReader::UnexpectedElementError,
+ QLatin1String("Unexpected token type %1 in %2.")
+ .arg(q->tokenString(), contextString(context)));
+ return;
+ }
+
+ if (type != QXmlStreamReader::DTD)
+ return;
+
+ // Raise error on multiple DTD tokens
+ if (foundDTD) {
+ raiseError(QXmlStreamReader::UnexpectedElementError,
+ QLatin1String("Found second DTD token in %1.").arg(contextString(context)));
+ } else {
+ foundDTD = true;
+ }
+}
+
/*!
\fn bool QXmlStreamAttributes::hasAttribute(const QString &qualifiedName) const
\since 4.5
Index: qtbase-opensource-src-5.15.2+dfsg/src/corelib/serialization/qxmlstream_p.h
===================================================================
--- qtbase-opensource-src-5.15.2+dfsg.orig/src/corelib/serialization/qxmlstream_p.h 2024-04-25 08:25:46.389994553 +0200
+++ qtbase-opensource-src-5.15.2+dfsg/src/corelib/serialization/qxmlstream_p.h 2024-04-25 08:25:46.385994556 +0200
@@ -804,6 +804,17 @@
#endif
bool atEnd;
+ enum class XmlContext
+ {
+ Prolog,
+ Body,
+ };
+
+ XmlContext currentContext = XmlContext::Prolog;
+ bool foundDTD = false;
+ bool isValidToken(QXmlStreamReader::TokenType type);
+ void checkToken();
+
/*!
\sa setType()
*/
|