1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* SourceText encapsulates a count of char16_t (UTF-16) or Utf8Unit (UTF-8)
* code units (note: code *units*, not bytes or code points) and those code
* units ("source units"). (Latin-1 is not supported: all places where Latin-1
* must be compiled first convert to a supported encoding.)
*
* A SourceText either observes without owning, or takes ownership of, source
* units passed to |SourceText::init|. Thus SourceText can be used to
* efficiently avoid copying.
*
* Rules for use:
*
* 1) The passed-in source units must be allocated with js_malloc(),
* js_calloc(), or js_realloc() if |SourceText::init| is instructed to take
* ownership of the source units.
* 2) If |SourceText::init| merely borrows the source units, the user must
* keep them alive until associated JS compilation is complete.
* 3) Code that calls |SourceText::take{Chars,Units}()| must keep the source
* units alive until JS compilation completes. Normally only the JS engine
* should call |SourceText::take{Chars,Units}()|.
* 4) Use the appropriate SourceText parameterization depending on the source
* units encoding.
*
* Example use:
*
* size_t length = 512;
* char16_t* chars = js_pod_malloc<char16_t>(length);
* if (!chars) {
* JS_ReportOutOfMemory(cx);
* return false;
* }
* JS::SourceText<char16_t> srcBuf;
* if (!srcBuf.init(cx, chars, length, JS::SourceOwnership::TakeOwnership)) {
* return false;
* }
* JS::Rooted<JSScript*> script(cx);
* if (!JS::Compile(cx, options, srcBuf, &script)) {
* return false;
* }
*/
#ifndef js_SourceText_h
#define js_SourceText_h
#include "mozilla/Assertions.h" // MOZ_ASSERT
#include "mozilla/Attributes.h" // MOZ_COLD, MOZ_IS_CLASS_INIT
#include "mozilla/Likely.h" // MOZ_UNLIKELY
#include <stddef.h> // size_t
#include <stdint.h> // UINT32_MAX
#include <type_traits> // std::conditional_t, std::is_same_v
#include "js/UniquePtr.h" // js::UniquePtr
#include "js/Utility.h" // JS::FreePolicy
namespace mozilla {
union Utf8Unit;
}
namespace js {
class FrontendContext;
} // namespace js
namespace JS {
class JS_PUBLIC_API AutoStableStringChars;
using FrontendContext = js::FrontendContext;
namespace detail {
MOZ_COLD extern JS_PUBLIC_API void ReportSourceTooLong(JSContext* cx);
MOZ_COLD extern JS_PUBLIC_API void ReportSourceTooLong(JS::FrontendContext* fc);
} // namespace detail
enum class SourceOwnership {
Borrowed,
TakeOwnership,
};
template <typename Unit>
class SourceText final {
private:
static_assert(std::is_same_v<Unit, mozilla::Utf8Unit> ||
std::is_same_v<Unit, char16_t>,
"Unit must be either char16_t or Utf8Unit for "
"SourceText<Unit>");
/** |char16_t| or |Utf8Unit| source units of uncertain validity. */
const Unit* units_ = nullptr;
/** The length in code units of |units_|. */
uint32_t length_ = 0;
/**
* Whether this owns |units_| or merely observes source units owned by some
* other object.
*/
bool ownsUnits_ = false;
public:
// A C++ character type that can represent the source units -- suitable for
// passing to C++ string functions.
using CharT =
std::conditional_t<std::is_same_v<Unit, char16_t>, char16_t, char>;
public:
/**
* Construct a SourceText. It must be initialized using |init()| before it
* can be used as compilation source text.
*/
SourceText() = default;
/**
* Construct a SourceText from contents extracted from |other|. This
* SourceText will then act exactly as |other| would have acted, had it
* not been passed to this function. |other| will return to its default-
* constructed state and must have |init()| called on it to use it.
*/
SourceText(SourceText&& other)
: units_(other.units_),
length_(other.length_),
ownsUnits_(other.ownsUnits_) {
other.units_ = nullptr;
other.length_ = 0;
other.ownsUnits_ = false;
}
~SourceText() {
if (ownsUnits_) {
js_free(const_cast<Unit*>(units_));
}
}
private:
template <typename ContextT>
[[nodiscard]] MOZ_IS_CLASS_INIT bool initImpl(ContextT* context,
const Unit* units,
size_t unitsLength,
SourceOwnership ownership) {
MOZ_ASSERT_IF(units == nullptr, unitsLength == 0);
// Ideally we'd use |Unit| and not cast below, but the risk of a static
// initializer is too great.
static const CharT emptyString[] = {'\0'};
// Initialize all fields *before* checking length. This ensures that
// if |ownership == SourceOwnership::TakeOwnership|, |units| will be
// freed when |this|'s destructor is called.
if (units) {
units_ = units;
length_ = static_cast<uint32_t>(unitsLength);
ownsUnits_ = ownership == SourceOwnership::TakeOwnership;
} else {
units_ = reinterpret_cast<const Unit*>(emptyString);
length_ = 0;
ownsUnits_ = false;
}
// IMPLEMENTATION DETAIL, DO NOT RELY ON: This limit is used so we can
// store offsets in |JSScript|s as |uint32_t|. It could be lifted
// fairly easily if desired, as the compiler uses |size_t| internally.
if (MOZ_UNLIKELY(unitsLength > UINT32_MAX)) {
detail::ReportSourceTooLong(context);
return false;
}
return true;
}
public:
/**
* Initialize this with source unit data: |char16_t| for UTF-16 source
* units, or |Utf8Unit| for UTF-8 source units.
*
* If |ownership == TakeOwnership|, *this function* takes ownership of
* |units|, *even if* this function fails, and you MUST NOT free |units|
* yourself. This single-owner-friendly approach reduces risk of leaks on
* failure.
*
* |units| may be null if |unitsLength == 0|; if so, this will silently be
* initialized using non-null, unowned units.
*/
[[nodiscard]] MOZ_IS_CLASS_INIT bool init(JSContext* cx, const Unit* units,
size_t unitsLength,
SourceOwnership ownership) {
return initImpl(cx, units, unitsLength, ownership);
}
[[nodiscard]] MOZ_IS_CLASS_INIT bool init(JS::FrontendContext* fc,
const Unit* units,
size_t unitsLength,
SourceOwnership ownership) {
return initImpl(fc, units, unitsLength, ownership);
}
/**
* Exactly identical to the |init()| overload above that accepts
* |const Unit*|, but instead takes character data: |const CharT*|.
*
* (We can't just write this to accept |const CharT*|, because then in the
* UTF-16 case this overload and the one above would be identical. So we
* use SFINAE to expose the |CharT| overload only if it's different.)
*/
template <typename Char,
typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
!std::is_same_v<Char, Unit>>>
[[nodiscard]] MOZ_IS_CLASS_INIT bool init(JSContext* cx, const Char* chars,
size_t charsLength,
SourceOwnership ownership) {
return initImpl(cx, reinterpret_cast<const Unit*>(chars), charsLength,
ownership);
}
template <typename Char,
typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
!std::is_same_v<Char, Unit>>>
[[nodiscard]] MOZ_IS_CLASS_INIT bool init(JS::FrontendContext* fc,
const Char* chars,
size_t charsLength,
SourceOwnership ownership) {
return initImpl(fc, reinterpret_cast<const Unit*>(chars), charsLength,
ownership);
}
/**
* Initialize this using source units transferred out of |data|.
*/
[[nodiscard]] bool init(JSContext* cx,
js::UniquePtr<Unit[], JS::FreePolicy> data,
size_t dataLength) {
return initImpl(cx, data.release(), dataLength,
SourceOwnership::TakeOwnership);
}
[[nodiscard]] bool init(JS::FrontendContext* fc,
js::UniquePtr<Unit[], JS::FreePolicy> data,
size_t dataLength) {
return initImpl(fc, data.release(), dataLength,
SourceOwnership::TakeOwnership);
}
/**
* Exactly identical to the |init()| overload above that accepts
* |UniquePtr<Unit[], JS::FreePolicy>|, but instead takes character data:
* |UniquePtr<CharT[], JS::FreePolicy>|.
*
* (We can't just duplicate the signature above with s/Unit/CharT/, because
* then in the UTF-16 case this overload and the one above would be identical.
* So we use SFINAE to expose the |CharT| overload only if it's different.)
*/
template <typename Char,
typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
!std::is_same_v<Char, Unit>>>
[[nodiscard]] bool init(JSContext* cx,
js::UniquePtr<Char[], JS::FreePolicy> data,
size_t dataLength) {
return init(cx, data.release(), dataLength, SourceOwnership::TakeOwnership);
}
template <typename Char,
typename = std::enable_if_t<std::is_same_v<Char, CharT> &&
!std::is_same_v<Char, Unit>>>
[[nodiscard]] bool init(JS::FrontendContext* fc,
js::UniquePtr<Char[], JS::FreePolicy> data,
size_t dataLength) {
return init(fc, data.release(), dataLength, SourceOwnership::TakeOwnership);
}
/**
* Initialize this using an AutoStableStringChars. Transfers the code units if
* they are owned by the AutoStableStringChars, otherwise borrow directly from
* the underlying JSString. The AutoStableStringChars must outlive this
* SourceText and must be explicitly configured to the same unit type as this
* SourceText.
*/
[[nodiscard]] bool initMaybeBorrowed(JSContext* cx,
AutoStableStringChars& linearChars);
[[nodiscard]] bool initMaybeBorrowed(JS::FrontendContext* fc,
AutoStableStringChars& linearChars);
/**
* Access the encapsulated data using a code unit type.
*
* This function is useful for code that wants to interact with source text
* as *code units*, not as string data. This doesn't matter for UTF-16,
* but it's a crucial distinction for UTF-8. When UTF-8 source text is
* encapsulated, |Unit| being |mozilla::Utf8Unit| unambiguously indicates
* that the code units are UTF-8. In contrast |const char*| returned by
* |get()| below could hold UTF-8 (or its ASCII subset) or Latin-1 or (in
* particularly cursed embeddings) EBCDIC or some other legacy character
* set. Prefer this function to |get()| wherever possible.
*/
const Unit* units() const { return units_; }
/**
* Access the encapsulated data using a character type.
*
* This function is useful for interactions with character-centric actions
* like interacting with UniqueChars/UniqueTwoByteChars or printing out
* text in a debugger, that only work with |CharT|. But as |CharT| loses
* encoding specificity when UTF-8 source text is encapsulated, prefer
* |units()| to this function.
*/
const CharT* get() const { return reinterpret_cast<const CharT*>(units_); }
/**
* Returns true if this owns the source units and will free them on
* destruction. If true, it is legal to call |take{Chars,Units}()|.
*/
bool ownsUnits() const { return ownsUnits_; }
/**
* Count of the underlying source units -- code units, not bytes or code
* points -- in this.
*/
uint32_t length() const { return length_; }
/**
* Retrieve and take ownership of the underlying source units. The caller
* is now responsible for calling js_free() on the returned value, *but
* only after JS script compilation has completed*.
*
* After underlying source units have been taken, this will continue to
* refer to the same data -- it just won't own the data. get() and
* length() will return the same values, but ownsUnits() will be false.
* The taken source units must be kept alive until after JS script
* compilation completes, as noted above, for this to be safe.
*
* The caller must check ownsUnits() before calling takeUnits(). Taking
* and then free'ing an unowned buffer will have dire consequences.
*/
Unit* takeUnits() {
MOZ_ASSERT(ownsUnits_);
ownsUnits_ = false;
return const_cast<Unit*>(units_);
}
/**
* Akin to |takeUnits()| in all respects, but returns characters rather
* than units.
*/
CharT* takeChars() { return reinterpret_cast<CharT*>(takeUnits()); }
private:
SourceText(const SourceText&) = delete;
void operator=(const SourceText&) = delete;
};
} // namespace JS
#endif /* js_SourceText_h */
|