1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
|
//===--- TypeCheckRegex.cpp - Regex type checking utilities ---------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "swift/AST/ASTContext.h"
#include "swift/AST/Decl.h"
#include "swift/AST/DiagnosticsSema.h"
#include "swift/AST/Expr.h"
#include "swift/AST/Type.h"
#include "swift/AST/TypeCheckRequests.h"
#include "swift/AST/Types.h"
#include "swift/Basic/Assertions.h"
#include "swift/Basic/Defer.h"
#include "swift/Bridging/ASTGen.h"
using namespace swift;
typedef uint16_t CaptureStructureSerializationVersion;
static unsigned
getCaptureStructureSerializationAllocationSize(unsigned regexLength) {
return sizeof(CaptureStructureSerializationVersion) + regexLength + 1;
}
enum class RegexCaptureStructureCode: uint8_t {
End = 0,
Atom = 1,
NamedAtom = 2,
FormArray = 3,
FormOptional = 4,
BeginTuple = 5,
EndTuple = 6,
CaseCount
};
/// Decodes regex capture types from the given serialization and appends the
/// decoded capture types to @p result. Returns true if the serialization is
/// malformed.
static bool decodeRegexCaptureTypes(ASTContext &ctx,
ArrayRef<uint8_t> serialization,
Type atomType,
SmallVectorImpl<TupleTypeElt> &result) {
// Encoding rules:
// encode(〚`T`〛) ==> <version>, 〚`T`〛, .end
// 〚`T` (atom)〛 ==> .atom
// 〚`name: T` (atom)〛 ==> .atom, `name`, '\0'
// 〚`[T]`〛 ==> 〚`T`〛, .formArray
// 〚`T?`〛 ==> 〚`T`〛, .formOptional
// 〚`(T0, T1, ...)` (top level)〛 ==> 〚`T0`〛, 〚`T1`〛, ...
// 〚`(T0, T1, ...)`〛 ==> .beginTuple, 〚`T0`〛, 〚`T1`〛, ..., .endTuple
//
// For details, see apple/swift-experimental-string-processing.
using Version = CaptureStructureSerializationVersion;
static const Version implVersion = 1;
unsigned size = serialization.size();
// A serialization should store a version and `.end` at the very least.
unsigned minSize = sizeof(Version) + sizeof(RegexCaptureStructureCode);
if (size < minSize)
return false;
// Read version.
Version version = *reinterpret_cast<const Version *>(serialization.data());
if (version != implVersion)
return true;
// Read contents.
SmallVector<SmallVector<TupleTypeElt, 4>, 4> scopes(1);
unsigned offset = sizeof(Version);
auto consumeCode = [&]() -> std::optional<RegexCaptureStructureCode> {
auto rawValue = serialization[offset];
if (rawValue >= (uint8_t)RegexCaptureStructureCode::CaseCount)
return std::nullopt;
offset += sizeof(RegexCaptureStructureCode);
return (RegexCaptureStructureCode)rawValue;
};
do {
auto code = consumeCode();
if (!code)
return false;
switch (*code) {
case RegexCaptureStructureCode::End:
offset = size;
break;
case RegexCaptureStructureCode::Atom:
scopes.back().push_back(atomType);
break;
case RegexCaptureStructureCode::NamedAtom: {
auto *namePtr = reinterpret_cast<const char *>(
serialization.slice(offset).data());
auto length = strnlen(namePtr, size - offset);
if (length >= size - offset)
return true; // Unterminated string.
StringRef name(namePtr, length);
scopes.back().push_back(
TupleTypeElt(atomType, ctx.getIdentifier(name)));
offset += length + /*NUL*/ 1;
break;
}
case RegexCaptureStructureCode::FormArray: {
auto &element = scopes.back().back();
element = TupleTypeElt(ArraySliceType::get(element.getType()),
element.getName());
break;
}
case RegexCaptureStructureCode::FormOptional: {
auto &element = scopes.back().back();
element = TupleTypeElt(OptionalType::get(element.getType()),
element.getName());
break;
}
case RegexCaptureStructureCode::BeginTuple:
scopes.push_back({});
break;
case RegexCaptureStructureCode::EndTuple: {
auto children = scopes.pop_back_val();
assert(children.size() > 1);
auto type = TupleType::get(children, ctx);
scopes.back().push_back(Type(type));
break;
}
case RegexCaptureStructureCode::CaseCount:
llvm_unreachable("Handled earlier");
}
} while (offset < size);
if (scopes.size() != 1)
return true; // Unterminated tuple.
auto &elements = scopes.back();
result.append(elements.begin(), elements.end());
return false;
}
static Type computeRegexLiteralType(const RegexLiteralExpr *regex,
ArrayRef<uint8_t> serializedCaptures) {
auto &ctx = regex->getASTContext();
auto *regexDecl = ctx.getRegexDecl();
if (!regexDecl) {
ctx.Diags.diagnose(regex->getLoc(), diag::string_processing_lib_missing,
ctx.Id_Regex.str());
return Type();
}
SmallVector<TupleTypeElt, 4> matchElements;
if (decodeRegexCaptureTypes(ctx, serializedCaptures,
/*atomType*/ ctx.getSubstringType(),
matchElements)) {
ctx.Diags.diagnose(regex->getLoc(),
diag::regex_capture_types_failed_to_decode);
return Type();
}
assert(!matchElements.empty() && "Should have decoded at least an atom");
if (matchElements.size() == 1)
return BoundGenericStructType::get(regexDecl, Type(),
matchElements.front().getType());
// Form a tuple.
auto matchType = TupleType::get(matchElements, ctx);
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
}
RegexLiteralPatternInfo
RegexLiteralPatternInfoRequest::evaluate(Evaluator &eval,
const RegexLiteralExpr *regex) const {
#if SWIFT_BUILD_REGEX_PARSER_IN_COMPILER
auto &ctx = regex->getASTContext();
auto regexText = regex->getParsedRegexText();
// Let the Swift library parse the contents, returning an error, or null if
// successful.
size_t version = 0;
auto capturesSize =
getCaptureStructureSerializationAllocationSize(regexText.size());
std::vector<uint8_t> capturesBuf(capturesSize);
BridgedRegexLiteralPatternFeatures bridgedFeatures;
SWIFT_DEFER {
swift_ASTGen_freeBridgedRegexLiteralPatternFeatures(bridgedFeatures);
};
bool hadError = swift_ASTGen_parseRegexLiteral(
regexText,
/*versionOut=*/&version,
/*captureStructureOut=*/capturesBuf.data(),
/*captureStructureSize=*/capturesBuf.size(),
/*patternFeaturesOut=*/&bridgedFeatures,
/*diagBaseLoc=*/regex->getLoc(), &ctx.Diags);
if (hadError)
return {regexText, Type(), /*version*/ 0, /*features*/ {}};
SmallVector<RegexLiteralPatternFeature> features;
for (auto &bridgedFeature : bridgedFeatures.unbridged())
features.push_back(bridgedFeature.unbridged());
assert(version >= 1);
auto regexTy = computeRegexLiteralType(regex, capturesBuf);
// FIXME: We need to plumb through the 'regexToEmit' result to the caller.
// For now, it is the same as the input.
return {/*regexToEmit*/ regexText, regexTy, version,
ctx.AllocateCopy(features)};
#else
llvm_unreachable("Shouldn't have parsed a RegexLiteralExpr");
#endif
}
StringRef RegexLiteralFeatureDescriptionRequest::evaluate(
Evaluator &evaluator, RegexLiteralPatternFeatureKind kind,
ASTContext *ctx) const {
#if SWIFT_BUILD_REGEX_PARSER_IN_COMPILER
// The resulting string is allocated in the ASTContext, we can return the
// StringRef directly.
BridgedStringRef str;
swift_ASTGen_getDescriptionForRegexPatternFeature(kind, *ctx, &str);
return str.unbridged();
#else
llvm_unreachable("Shouldn't have parsed a RegexLiteralExpr");
#endif
}
AvailabilityRange RegexLiteralFeatureAvailabilityRequest::evaluate(
Evaluator &evaluator, RegexLiteralPatternFeatureKind kind,
ASTContext *ctx) const {
#if SWIFT_BUILD_REGEX_PARSER_IN_COMPILER
BridgedSwiftVersion version;
swift_ASTGen_getSwiftVersionForRegexPatternFeature(kind, &version);
return ctx->getSwiftAvailability(version.getMajor(), version.getMinor());
#else
llvm_unreachable("Shouldn't have parsed a RegexLiteralExpr");
#endif
}
|