1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
|
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//
/// A single instruction for the matching engine to execute
///
/// Instructions are 64-bits, consisting of an 8-bit opcode
/// and a 56-bit payload, which packs operands.
///
struct Instruction: RawRepresentable, Hashable {
var rawValue: UInt64
init(rawValue: UInt64){
self.rawValue = rawValue
}
}
extension Instruction {
enum OpCode: UInt64 {
case invalid = 0
// MARK: - General Purpose
/// Move an immediate value into a register
///
/// moveImmediate(_ i: Int, into: IntReg)
///
/// Operands:
/// - Immediate value to move
/// - Int register to move into
///
case moveImmediate
/// Move the current position into a register
///
/// moveCurrentPosition(into: PositionRegister)
///
/// Operands:
/// - Position register to move into
case moveCurrentPosition
/// Set the current position to the value stored in the register
///
/// restorePosition(from: PositionRegister)
///
/// Operands:
/// - Position register to read from
case restorePosition
// MARK: General Purpose: Control flow
/// Branch to a new instruction
///
/// branch(to: InstAddr)
///
/// Operand: instruction address to branch to
case branch
/// Conditionally branch if zero, otherwise decrement
///
/// condBranch(
/// to: InstAddr, ifZeroElseDecrement: IntReg)
///
/// Operands:
/// - Instruction address to branch to, if zero
/// - Int register to check for zero, otherwise decrease
///
case condBranchZeroElseDecrement
/// Conditionally branch if the current position is the same as the register
///
/// condBranch(
/// to: InstAddr, ifSamePositionAs: PositionRegister)
///
/// Operands:
/// - Instruction address to branch to, if the position in the register is the same as currentPosition
/// - Position register to check against
case condBranchSamePosition
// TODO: Function calls
// MARK: - Matching
/// Advance the input position.
///
/// advance(_ amount: Distance)
///
/// Operand: Amount to advance by.
case advance
// TODO: Is the amount useful here? Is it commonly more than 1?
/// Composite assert-advance else restore.
///
/// match(_: EltReg, isCaseInsensitive: Bool)
///
/// Operands:
/// - Element register to compare against.
/// - Boolean for if we should match in a case insensitive way
case match
/// Match against a scalar and possibly perform a boundary check or match in a case insensitive way
///
/// matchScalar(_: Unicode.Scalar, isCaseInsensitive: Bool, boundaryCheck: Bool)
///
/// Operands: Scalar value to match against and booleans
case matchScalar
/// Match a character or a scalar against a set of valid ascii values stored in a bitset
///
/// matchBitset(_: AsciiBitsetRegister, isScalar: Bool)
///
/// Operand:
/// - Ascii bitset register containing the bitset
/// - Boolean for if we should match by scalar value
case matchBitset
/// Match against a built-in character class
///
/// matchBuiltin(_: CharacterClassPayload)
///
/// Operand: the payload contains
/// - The character class
/// - If it is inverted
/// - If it strictly matches only ascii values
case matchBuiltin
/// Matches any non newline character
/// Operand: If we are in scalar mode or not
case matchAnyNonNewline
// MARK: Extension points
/// Advance the input position based on the result by calling the consume
/// function.
///
/// Operand: Consume function register to call.
case consumeBy
/// Lookaround assertion operation. Performs a zero width assertion based on
/// the assertion type and options stored in the payload
///
/// assert(_:AssertionPayload)
///
/// Operands: AssertionPayload containing assertion type and options
case assertBy
/// Custom value-creating consume operation.
///
/// match(
/// _ matchFunction: (
/// input: Input,
/// bounds: Range<Position>
/// ) -> (Position, Any),
/// into: ValueReg
/// )
///
///
case matchBy
// MARK: Matching: Save points
/// Add a save point
///
/// Operand: instruction address to resume from
///
/// A save point is:
/// - a position in the input to restore
/// - a position in the call stack to cut off
/// - an instruction address to resume from
///
/// TODO: Consider if separating would improve generality
case save
///
/// Add a save point that doesn't preserve input position
///
/// NOTE: This is a prototype for now, but exposes
/// flaws in our formulation of back tracking. We could
/// instead have an instruction to update the top
/// most saved position instead
case saveAddress
/// Remove the most recently saved point
///
/// Precondition: There is a save point to remove
case clear
/// Remove save points up to and including the operand
///
/// Operand: instruction address to look for
///
/// Precondition: The operand is in the save point list
case clearThrough
/// Fused save-and-branch.
///
/// split(to: target, saving: backtrackPoint)
///
case splitSaving
/// Fused quantify, execute, save instruction
/// Quantifies the stored instruction in an inner loop instead of looping through instructions in processor
/// Only quantifies specific nodes
///
/// quantify(_:QuantifyPayload)
///
case quantify
/// Begin the given capture
///
/// beginCapture(_:CapReg)
///
case beginCapture
/// End the given capture
///
/// endCapture(_:CapReg)
///
case endCapture
/// Transform a captured value, saving the built value
///
/// transformCapture(_:CapReg, _:TransformReg)
///
case transformCapture
/// Save a value into a capture register
///
/// captureValue(_: ValReg, into _: CapReg)
case captureValue
/// Match a previously captured value
///
/// backreference(_:CapReg)
///
case backreference
// MARK: Matching: State transitions
// TODO: State transitions need more work. We want
// granular core but also composite ones that will
// interact with save points
/// Transition into ACCEPT and halt
case accept
/// Signal failure (currently same as `restore`)
case fail
// TODO: Fused assertions. It seems like we often want to
// branch based on assertion fail or success.
}
}
internal var _opcodeMask: UInt64 { 0xFF00_0000_0000_0000 }
var _payloadMask: UInt64 { ~_opcodeMask }
extension Instruction {
var opcodeMask: UInt64 { 0xFF00_0000_0000_0000 }
var opcode: OpCode {
get {
OpCode(
rawValue: (rawValue & _opcodeMask) &>> 56
).unsafelyUnwrapped
}
set {
assert(newValue != .invalid, "consider hoisting this")
assert(newValue.rawValue < 256)
self.rawValue &= ~_opcodeMask
self.rawValue |= newValue.rawValue &<< 56
}
}
var payload: Payload {
get { Payload(rawValue: rawValue & ~opcodeMask) }
set {
self.rawValue &= opcodeMask
self.rawValue |= newValue.rawValue
}
}
var destructure: (opcode: OpCode, payload: Payload) {
get { (opcode, payload) }
set { self = Self(opcode, payload) }
}
init(_ opcode: OpCode, _ payload: Payload/* = Payload()*/) {
self.init(rawValue: 0)
self.opcode = opcode
self.payload = payload
// TODO: check invariants
}
init(_ opcode: OpCode) {
self.init(rawValue: 0)
self.opcode = opcode
//self.payload = payload
// TODO: check invariants
// TODO: placeholder bit pattern for fill-in-later
}
}
/*
This is in need of more refactoring and design, the following
are a rough listing of TODOs:
- Save point and call stack interactions should be more formalized.
- It's too easy to have unbalanced save/clears amongst function calls
- Nominal type for conditions with an invert bit
- Better bit allocation and layout for operand, instruction, etc
- Use spare bits for better assertions
- Check low-level compiler code gen for switches
- Consider relative addresses instead of absolute addresses
- Explore a predication bit
- Explore using SIMD
- Explore a larger opcode, so that we can have variant flags
- E.g., opcode-local bits instead of flattening opcode space
We'd like to eventually design:
- A general-purpose core (future extensibility)
- A matching-specific instruction area carved out
- Leave a large area for future usage of run-time bytecode interpretation
- Debate: allow for future variable-width instructions
We'd like a testing / performance setup that lets us
- Define new instructions in terms of old ones (testing, perf)
- Version our instruction set in case we need future fixes
*/
// TODO: replace with instruction formatters...
extension Instruction {
var instructionAddress: InstructionAddress? {
switch opcode {
case .branch, .save, .saveAddress:
return payload.addr
default: return nil
}
}
var elementRegister: ElementRegister? {
switch opcode {
case .match:
return payload.elementPayload.1
default: return nil
}
}
var consumeFunctionRegister: ConsumeFunctionRegister? {
switch opcode {
case .consumeBy: return payload.consumer
default: return nil
}
}
}
extension Instruction: InstructionProtocol {
var operandPC: InstructionAddress? { instructionAddress }
}
// TODO: better names for accept/fail/etc. Instruction
// conflates backtracking with signaling failure or success,
// could be clearer.
enum State {
/// Still running
case inProgress
/// FAIL: halt and signal failure
case fail
/// ACCEPT: halt and signal success
case accept
}
|