1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
|
require 'mustermann'
require 'mustermann/pattern_cache'
require 'delegate'
module Mustermann
# Class inspired by Ruby's StringScanner to scan an input string using multiple patterns.
#
# @example
# require 'mustermann/string_scanner'
# scanner = Mustermann::StringScanner.new("here is our example string")
#
# scanner.scan("here") # => "here"
# scanner.getch # => " "
#
# if scanner.scan(":verb our")
# scanner.scan(:noun, capture: :word)
# scanner[:verb] # => "is"
# scanner[:nound] # => "example"
# end
#
# scanner.rest # => "string"
#
# @note
# This structure is not thread-safe, you should not scan on the same StringScanner instance concurrently.
# Even if it was thread-safe, scanning concurrently would probably lead to unwanted behaviour.
class StringScanner
# Exception raised if scan/unscan operation cannot be performed.
ScanError = Class.new(::ScanError)
PATTERN_CACHE = PatternCache.new
#private_constant :PATTERN_CACHE
# Patterns created by {#scan} will be globally cached, since we assume that there is a finite number
# of different patterns used and that they are more likely to be reused than not.
# This method allows clearing the cache.
#
# @see Mustermann::PatternCache
def self.clear_cache
PATTERN_CACHE.clear
end
# @return [Integer] number of cached patterns
# @see clear_cache
# @api private
def self.cache_size
PATTERN_CACHE.size
end
# Encapsulates return values for {StringScanner#scan}, {StringScanner#check}, and friends.
# Behaves like a String (the substring which matched the pattern), but also exposes its position
# in the main string and any params parsed from it.
class ScanResult < DelegateClass(String)
# The scanner this result came from.
# @example
# require 'mustermann/string_scanner'
# scanner = Mustermann::StringScanner.new('foo/bar')
# scanner.scan(:name).scanner == scanner # => true
attr_reader :scanner
# @example
# require 'mustermann/string_scanner'
# scanner = Mustermann::StringScanner.new('foo/bar')
# scanner.scan(:name).position # => 0
# scanner.getch.position # => 3
# scanner.scan(:name).position # => 4
#
# @return [Integer] position the substring starts at
attr_reader :position
alias_method :pos, :position
# @example
# require 'mustermann/string_scanner'
# scanner = Mustermann::StringScanner.new('foo/bar')
# scanner.scan(:name).length # => 3
# scanner.getch.length # => 1
# scanner.scan(:name).length # => 3
#
# @return [Integer] length of the substring
attr_reader :length
# Params parsed from the substring.
# Will not include params from previous scan results.
#
# @example
# require 'mustermann/string_scanner'
# scanner = Mustermann::StringScanner.new('foo/bar')
# scanner.scan(:name).params # => { "name" => "foo" }
# scanner.getch.params # => {}
# scanner.scan(:name).params # => { "name" => "bar" }
#
# @see Mustermann::StringScanner#params
# @see Mustermann::StringScanner#[]
#
# @return [Hash] params parsed from the substring
attr_reader :params
# @api private
def initialize(scanner, position, length, params = {})
@scanner, @position, @length, @params = scanner, position, length, params
end
# @api private
# @!visibility private
def __getobj__
@__getobj__ ||= scanner.to_s[position, length]
end
end
# @return [Hash] default pattern options used for {#scan} and similar methods
# @see #initialize
attr_reader :pattern_options
# Params from all previous matches from {#scan} and {#scan_until},
# but not from {#check} and {#check_until}. Changes can be reverted
# with {#unscan} and it can be completely cleared via {#reset}.
#
# @return [Hash] current params
attr_reader :params
# @return [Integer] current scan position on the input string
attr_accessor :position
alias_method :pos, :position
alias_method :pos=, :position=
# @example with different default type
# require 'mustermann/string_scanner'
# scanner = Mustermann::StringScanner.new("foo/bar/baz", type: :shell)
# scanner.scan('*') # => "foo"
# scanner.scan('**/*') # => "/bar/baz"
#
# @param [String] string the string to scan
# @param [Hash] pattern_options default options used for {#scan}
def initialize(string = "", pattern_options = {})
pattern_options, string = string, {} if string.kind_of?(Hash)
@pattern_options = pattern_options
@string = String(string).dup
reset
end
# Resets the {#position} to the start and clears all {#params}.
# @return [Mustermann::StringScanner] the scanner itself
def reset
@position = 0
@params = {}
@history = []
self
end
# Moves the position to the end of the input string.
# @return [Mustermann::StringScanner] the scanner itself
def terminate
track_result ScanResult.new(self, @position, size - @position)
self
end
# Checks if the given pattern matches any substring starting at the current position.
#
# If it does, it will advance the current {#position} to the end of the substring and merges any params parsed
# from the substring into {#params}.
#
# @param (see Mustermann.new)
# @return [Mustermann::StringScanner::ScanResult, nil] the matched substring, nil if it didn't match
def scan(pattern, options = {})
track_result check(pattern, options)
end
# Checks if the given pattern matches any substring starting at any position after the current position.
#
# If it does, it will advance the current {#position} to the end of the substring and merges any params parsed
# from the substring into {#params}.
#
# @param (see Mustermann.new)
# @return [Mustermann::StringScanner::ScanResult, nil] the matched substring, nil if it didn't match
def scan_until(pattern, options = {})
result, prefix = check_until_with_prefix(pattern, options)
track_result(prefix, result)
end
# Reverts the last operation that advanced the position.
#
# Operations advancing the position: {#terminate}, {#scan}, {#scan_until}, {#getch}.
# @return [Mustermann::StringScanner] the scanner itself
def unscan
raise ScanError, 'unscan failed: previous match record not exist' if @history.empty?
previous = @history[0..-2]
reset
previous.each { |r| track_result(*r) }
self
end
# Checks if the given pattern matches any substring starting at the current position.
#
# Does not affect {#position} or {#params}.
#
# @param (see Mustermann.new)
# @return [Mustermann::StringScanner::ScanResult, nil] the matched substring, nil if it didn't match
def check(pattern, options = {})
params, length = create_pattern(pattern, options).peek_params(rest)
ScanResult.new(self, @position, length, params) if params
end
# Checks if the given pattern matches any substring starting at any position after the current position.
#
# Does not affect {#position} or {#params}.
#
# @param (see Mustermann.new)
# @return [Mustermann::StringScanner::ScanResult, nil] the matched substring, nil if it didn't match
def check_until(pattern, options = {})
check_until_with_prefix(pattern, options).first
end
def check_until_with_prefix(pattern, options = {})
start = @position
@position += 1 until eos? or result = check(pattern, options)
prefix = ScanResult.new(self, start, @position - start) if result
[result, prefix]
ensure
@position = start
end
# Reads a single character and advances the {#position} by one.
# @return [Mustermann::StringScanner::ScanResult, nil] the character, nil if at end of string
def getch
track_result ScanResult.new(self, @position, 1) unless eos?
end
# Appends the given string to the string being scanned
#
# @example
# require 'mustermann/string_scanner'
# scanner = Mustermann::StringScanner.new
# scanner << "foo"
# scanner.scan(/.+/) # => "foo"
#
# @param [String] string will be appended
# @return [Mustermann::StringScanner] the scanner itself
def <<(string)
@string << string
self
end
# @return [true, false] whether or not the end of the string has been reached
def eos?
@position >= @string.size
end
# @return [true, false] whether or not the current position is at the start of a line
def beginning_of_line?
@position == 0 or @string[@position - 1] == "\n"
end
# @return [String] outstanding string not yet matched, empty string at end of input string
def rest
@string[@position..-1] || ""
end
# @return [Integer] number of character remaining to be scanned
def rest_size
@position > size ? 0 : size - @position
end
# Allows to peek at a number of still unscanned characters without advacing the {#position}.
#
# @param [Integer] length how many characters to look at
# @return [String] the substring
def peek(length = 1)
@string[@position, length]
end
# Shorthand for accessing {#params}. Accepts symbols as keys.
def [](key)
params[key.to_s]
end
# (see #params)
def to_h
params.dup
end
# @return [String] the input string
# @see #initialize
# @see #<<
def to_s
@string.dup
end
# @return [Integer] size of the input string
def size
@string.size
end
# @!visibility private
def inspect
"#<%p %d/%d @ %p>" % [ self.class, @position, @string.size, @string ]
end
# @!visibility private
def create_pattern(pattern, options = {})
PATTERN_CACHE.create_pattern(pattern, pattern_options.merge(options))
end
# @!visibility private
def track_result(*results)
results.compact!
@history << results if results.any?
results.each do |result|
@params.merge! result.params
@position += result.length
end
results.last
end
private :create_pattern, :track_result, :check_until_with_prefix
end
end
|