1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef NS_EXPAT_DRIVER__
#define NS_EXPAT_DRIVER__
#include "expat_config.h"
#include "moz_expat.h"
#include "nsCOMPtr.h"
#include "nsString.h"
#include "nsIDTD.h"
#include "nsIInputStream.h"
#include "nsIParser.h"
#include "nsCycleCollectionParticipant.h"
#include "nsScanner.h"
#include "rlbox_expat.h"
#include "nsRLBoxExpatDriver.h"
#include "mozilla/UniquePtr.h"
class nsIExpatSink;
struct nsCatalogData;
class RLBoxExpatSandboxData;
namespace mozilla {
template <typename, size_t>
class Array;
}
class nsExpatDriver : public nsIDTD {
virtual ~nsExpatDriver();
public:
NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
NS_DECL_NSIDTD
NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver)
nsExpatDriver();
nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink);
nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk);
int HandleExternalEntityRef(const char16_t* aOpenEntityNames,
const char16_t* aBase, const char16_t* aSystemId,
const char16_t* aPublicId);
static void HandleStartElement(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> aUserData,
tainted_expat<const char16_t*> aName,
tainted_expat<const char16_t**> aAtts);
static void HandleStartElementForSystemPrincipal(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
tainted_expat<const char16_t*> aName,
tainted_expat<const char16_t**> aAtts);
static void HandleEndElement(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> aUserData,
tainted_expat<const char16_t*> aName);
static void HandleEndElementForSystemPrincipal(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
tainted_expat<const char16_t*> aName);
nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength);
nsresult HandleComment(const char16_t* aName);
nsresult HandleProcessingInstruction(const char16_t* aTarget,
const char16_t* aData);
nsresult HandleXMLDeclaration(const char16_t* aVersion,
const char16_t* aEncoding, int32_t aStandalone);
nsresult HandleDefault(const char16_t* aData, const uint32_t aLength);
nsresult HandleStartCdataSection();
nsresult HandleEndCdataSection();
nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
const char16_t* aSysid,
const char16_t* aPubid,
bool aHasInternalSubset);
nsresult HandleEndDoctypeDecl();
private:
// Load up an external stream to get external entity information
nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
const char16_t* aURLStr,
nsIURI* aBaseURI,
nsIInputStream** aStream,
nsIURI** aAbsURI);
enum class ChunkOrBufferIsFinal {
None,
FinalChunk,
FinalChunkAndBuffer,
};
/**
* Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
* aLength should be 0. The result of the call will be stored in
* mInternalState. Expat will parse as much of the buffer as it can and store
* the rest in its internal buffer.
*
* @param aBuffer the buffer to pass to Expat. May be null.
* @param aLength the length of the buffer to pass to Expat (in number of
* char16_t's). Must be 0 if aBuffer is null and > 0 if
* aBuffer is not null.
* @param aIsFinal whether this is the last chunk in a row passed to
* ParseChunk, and if so whether it's the last chunk and
* buffer passed to ParseChunk (meaning there will be no more
* calls to ParseChunk for the document being parsed).
* @param aConsumed [out] the number of PRUnichars that Expat consumed. This
* doesn't include the PRUnichars that Expat stored in
* its buffer but didn't parse yet.
* @param aLastLineLength [out] the length of the last line that Expat has
* consumed. This will only be computed if
* aIsFinal is not None or mInternalState is set
* to a failure.
*/
void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
XML_Size* aLastLineLength);
/**
* Wrapper for ParseBuffer. If the buffer is too large to be copied into the
* sandbox all at once, splits it into chunks and invokes ParseBuffer in a
* loop.
*
* @param aBuffer the buffer to pass to Expat. May be null.
* @param aLength the length of the buffer to pass to Expat (in number of
* char16_t's). Must be 0 if aBuffer is null and > 0 if
* aBuffer is not null.
* @param aIsFinal whether there will definitely not be any more new buffers
* passed in to ParseBuffer
* @param aConsumed [out] the number of PRUnichars that Expat consumed. This
* doesn't include the PRUnichars that Expat stored in
* its buffer but didn't parse yet.
* @param aLastLineLength [out] the length of the last line that Expat has
* consumed.
*/
void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
bool aIsFinal, uint32_t* aPassedToExpat,
uint32_t* aConsumed, XML_Size* aLastLineLength);
nsresult HandleError();
void MaybeStopParser(nsresult aState);
bool BlockedOrInterrupted() {
return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
}
// Expat allows us to set the base URI for entities. It doesn't use the base
// URI itself, but just passes it along to all the entity handlers (just the
// external entity reference handler for us). It does expect the base URI as a
// null-terminated string, with the same character type as the parsed buffers
// (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
// a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
// also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
// Most of the time this base URI is unused (the external entity handler is
// rarely called), but when it is we also convert it back to a nsIURI, so we
// convert the string back to UTF-8.
//
// We'd rather not do any of these conversions and copies, so we use a (hacky)
// workaround. We store all base URIs in an array of nsIURIs. Instead of
// passing the real URI to Expat as a string, we pass it a null-terminated
// 2-character buffer. The first character of that buffer stores the index of
// the corresponding nsIURI in the array (incremented with 1 because 0 is used
// to terminate a string). The entity handler can then use the index from the
// base URI that Expat passes it to look up the right nsIURI from the array.
//
// GetExpatBaseURI pushes the nsIURI to the array, and creates the
// two-character buffer for it.
//
// GetBaseURI looks up the right nsIURI in the array, based on the index from
// the two-character buffer.
using ExpatBaseURI = mozilla::Array<XML_Char, 2>;
ExpatBaseURI GetExpatBaseURI(nsIURI* aURI);
nsIURI* GetBaseURI(const XML_Char* aBase) const;
RLBoxExpatSandboxData* SandboxData() const;
rlbox_sandbox_expat* Sandbox() const;
// Destroy expat parser and return sandbox to pool
void Destroy();
mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData;
tainted_expat<XML_Parser> mExpatParser;
nsString mLastLine;
nsString mCDataText;
// Various parts of a doctype
nsString mDoctypeName;
nsString mSystemID;
nsString mPublicID;
nsString mInternalSubset;
bool mInCData;
bool mInInternalSubset;
bool mInExternalDTD;
bool mMadeFinalCallToExpat;
// Used to track if we're in the parser.
bool mInParser;
nsresult mInternalState;
// The length of the data in Expat's buffer (in number of PRUnichars).
uint32_t mExpatBuffered;
uint16_t mTagDepth;
// These sinks all refer the same conceptual object. mOriginalSink is
// identical with the nsIContentSink* passed to WillBuildModel, and exists
// only to avoid QI-ing back to nsIContentSink*.
nsCOMPtr<nsIContentSink> mOriginalSink;
nsCOMPtr<nsIExpatSink> mSink;
const nsCatalogData* mCatalogData; // weak
nsTArray<nsCOMPtr<nsIURI>> mURIs;
// Used for error reporting.
uint64_t mInnerWindowID;
};
class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase {
friend class RLBoxExpatSandboxPool;
friend class nsExpatDriver;
public:
explicit RLBoxExpatSandboxData(uint64_t aSize)
: mozilla::RLBoxSandboxDataBase(aSize) {
MOZ_COUNT_CTOR(RLBoxExpatSandboxData);
}
~RLBoxExpatSandboxData();
rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); }
// After getting a sandbox from the pool we need to register the
// Handle{Start,End}Element callbacks and associate the driver with the
// sandbox.
void AttachDriver(bool IsSystemPrincipal, void* aDriver);
void DetachDriver();
private:
mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox;
// Common expat callbacks that persist across calls to {Attach,Detach}Driver,
// and consequently across sandbox reuses.
sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration;
sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData;
sandbox_callback_expat<XML_ProcessingInstructionHandler>
mHandleProcessingInstruction;
sandbox_callback_expat<XML_DefaultHandler> mHandleDefault;
sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef;
sandbox_callback_expat<XML_CommentHandler> mHandleComment;
sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection;
sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection;
sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl;
sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl;
// Expat callbacks specific to each driver, and thus (re)set across sandbox
// reuses.
sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement;
sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement;
};
#endif
|