1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
|
// (c) Microsoft Corporation 2005-2009.
#nowarn "47" // recursive initialization of LexBuffer
#if INTERNALIZED_POWER_PACK
namespace Internal.Utilities.Text.Lexing
#else
namespace Microsoft.FSharp.Text.Lexing
#endif
open System.Collections.Generic
// REVIEW: This type showed up on a parsing-intensive performance measurement. Consider whether it can be a struct-record later when we have this feature. -jomo
#if INTERNALIZED_POWER_PACK
type internal Position =
#else
type Position =
#endif
{ pos_fname : string;
pos_lnum : int;
#if INTERNALIZED_POWER_PACK
pos_orig_lnum : int;
#endif
pos_bol : int;
pos_cnum : int; }
member x.FileName = x.pos_fname
member x.Line = x.pos_lnum
#if INTERNALIZED_POWER_PACK
member x.OriginalLine = x.pos_orig_lnum
#endif
member x.Char = x.pos_cnum
member x.AbsoluteOffset = x.pos_cnum
member x.StartOfLine = x.pos_bol
member x.StartOfLineAbsoluteOffset = x.pos_bol
member x.Column = x.pos_cnum - x.pos_bol
member pos.NextLine =
{ pos with
#if INTERNALIZED_POWER_PACK
pos_orig_lnum = pos.OriginalLine + 1;
#endif
pos_lnum = pos.Line+1;
pos_bol = pos.AbsoluteOffset }
member pos.EndOfToken(n) = {pos with pos_cnum=pos.pos_cnum + n }
member pos.AsNewLinePos() = pos.NextLine
member pos.ShiftColumnBy(by) = {pos with pos_cnum = pos.pos_cnum + by}
static member Empty =
{ pos_fname="";
pos_lnum= 0;
#if INTERNALIZED_POWER_PACK
pos_orig_lnum = 0;
#endif
pos_bol= 0;
pos_cnum=0 }
static member FirstLine(filename) =
{ pos_fname=filename;
#if INTERNALIZED_POWER_PACK
pos_orig_lnum = 1;
#endif
pos_lnum= 1;
pos_bol= 0;
pos_cnum=0 }
#if INTERNALIZED_POWER_PACK
type internal LexBufferFiller<'char> =
#else
type LexBufferFiller<'char> =
#endif
{ fillSync : (LexBuffer<'char> -> unit) option
fillAsync : (LexBuffer<'char> -> Async<unit>) option }
and [<Sealed>]
#if INTERNALIZED_POWER_PACK
internal LexBuffer<'char>(filler: LexBufferFiller<'char>) as this =
#else
LexBuffer<'char>(filler: LexBufferFiller<'char>) as this =
#endif
let context = new Dictionary<string,obj>(1) in
let extendBufferSync = (fun () -> match filler.fillSync with Some refill -> refill this | None -> invalidOp "attempt to read synchronously from an asynchronous lex buffer")
let extendBufferAsync = (fun () -> match filler.fillAsync with Some refill -> refill this | None -> invalidOp "attempt to read asynchronously from a synchronous lex buffer")
let mutable buffer=[||];
/// number of valid charactes beyond bufferScanStart
let mutable bufferMaxScanLength=0;
/// count into the buffer when scanning
let mutable bufferScanStart=0;
/// number of characters scanned so far
let mutable bufferScanLength=0;
/// length of the scan at the last accepting state
let mutable lexemeLength=0;
/// action related to the last accepting state
let mutable bufferAcceptAction=0;
let mutable eof = false;
let mutable startPos = Position.Empty ;
let mutable endPos = Position.Empty
// Throw away all the input besides the lexeme
let discardInput () =
let keep = Array.sub buffer bufferScanStart bufferScanLength
let nkeep = keep.Length
Array.blit keep 0 buffer 0 nkeep;
bufferScanStart <- 0;
bufferMaxScanLength <- nkeep
member lexbuf.EndOfScan () : int =
// Printf.eprintf "endOfScan, lexBuffer.lexemeLength = %d\n" lexBuffer.lexemeLength;
if bufferAcceptAction < 0 then
failwith "unrecognized input"
// Printf.printf "endOfScan %d state %d on unconsumed input '%c' (%d)\n" a s (Char.chr inp) inp;
// Printf.eprintf "accept, lexeme = %s\n" (lexeme lexBuffer);
lexbuf.StartPos <- endPos;
lexbuf.EndPos <- endPos.EndOfToken(lexbuf.LexemeLength);
bufferAcceptAction
member lexbuf.StartPos
with get() = startPos
and set(b) = startPos <- b
member lexbuf.EndPos
with get() = endPos
and set(b) = endPos <- b
member lexbuf.Lexeme = Array.sub buffer bufferScanStart lexemeLength
member lexbuf.LexemeChar(n) = buffer.[n+bufferScanStart]
member lexbuf.BufferLocalStore = (context :> IDictionary<_,_>)
member lexbuf.LexemeLength with get() : int = lexemeLength and set v = lexemeLength <- v
member internal lexbuf.Buffer with get() : 'char[] = buffer and set v = buffer <- v
member internal lexbuf.BufferMaxScanLength with get() = bufferMaxScanLength and set v = bufferMaxScanLength <- v
member internal lexbuf.BufferScanLength with get() = bufferScanLength and set v = bufferScanLength <- v
member internal lexbuf.BufferScanStart with get() : int = bufferScanStart and set v = bufferScanStart <- v
member internal lexbuf.BufferAcceptAction with get() = bufferAcceptAction and set v = bufferAcceptAction <- v
member internal lexbuf.RefillBuffer = extendBufferSync
member internal lexbuf.AsyncRefillBuffer = extendBufferAsync
static member LexemeString(lexbuf:LexBuffer<char>) =
new System.String(lexbuf.Buffer,lexbuf.BufferScanStart,lexbuf.LexemeLength)
member lexbuf.IsPastEndOfStream
with get() = eof
and set(b) = eof <- b
member lexbuf.DiscardInput() = discardInput ()
member x.BufferScanPos = bufferScanStart + bufferScanLength
member lexbuf.EnsureBufferSize n =
if lexbuf.BufferScanPos + n >= buffer.Length then
let repl = Array.zeroCreate (lexbuf.BufferScanPos + n)
Array.blit buffer bufferScanStart repl bufferScanStart bufferScanLength;
buffer <- repl
static member FromReadFunctions (syncRead : ('char[] * int * int -> int) option, asyncRead : ('char[] * int * int -> Async<int>) option) : LexBuffer<'char> =
let extension= Array.zeroCreate 4096
let fillers =
{ fillSync =
match syncRead with
| None -> None
| Some read ->
Some (fun lexBuffer ->
let n = read(extension,0,extension.Length)
lexBuffer.EnsureBufferSize n;
Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n;
lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n);
fillAsync =
match asyncRead with
| None -> None
| Some read ->
Some (fun lexBuffer ->
async {
let! n = read(extension,0,extension.Length)
lexBuffer.EnsureBufferSize n;
Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n;
lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n }) }
new LexBuffer<_>(fillers)
// A full type signature is required on this method because it is used at more specific types within its own scope
static member FromFunction (f : 'char[] * int * int -> int) : LexBuffer<'char> = LexBuffer<_>.FromReadFunctions(Some(f),None)
static member FromAsyncFunction (f : 'char[] * int * int -> Async<int>) : LexBuffer<'char> = LexBuffer<_>.FromReadFunctions(None,Some(f))
static member FromCharFunction f : LexBuffer<char> =
LexBuffer<char>.FromFunction(fun (buff,start,len) ->
let buff2 = Array.zeroCreate len
let n = f buff2 len
Array.blit buff2 0 buff start len
n)
static member FromByteFunction f : LexBuffer<byte> =
LexBuffer<byte>.FromFunction(fun (buff,start,len) ->
let buff2 = Array.zeroCreate len
let n = f buff2 len
Array.blit buff2 0 buff start len
n)
// A full type signature is required on this method because it is used at more specific types within its own scope
static member FromArray (s: 'char[]) : LexBuffer<'char> =
let lexBuffer =
new LexBuffer<_>
{ fillSync = Some (fun _ -> ());
fillAsync = Some (fun _ -> async { return () }) }
let buffer = Array.copy s
lexBuffer.Buffer <- buffer;
lexBuffer.BufferMaxScanLength <- buffer.Length;
lexBuffer
static member FromBytes (arr) = LexBuffer<byte>.FromArray(arr)
static member FromChars (arr) = LexBuffer<char>.FromArray(arr)
static member FromString (s:string) = LexBuffer<char>.FromChars (s.ToCharArray())
static member FromTextReader (tr:System.IO.TextReader) : LexBuffer<char> =
LexBuffer<char>.FromFunction(tr.Read)
static member FromBinaryReader (br:System.IO.BinaryReader) : LexBuffer<byte> =
LexBuffer<byte>.FromFunction(br.Read)
static member FromStream (stream:System.IO.Stream) : LexBuffer<byte> =
LexBuffer<byte>.FromReadFunctions(Some(stream.Read),Some(fun (buf,offset,len) -> stream.AsyncRead(buf,offset=offset,count=len)))
module GenericImplFragments =
let startInterpret(lexBuffer:LexBuffer<_>)=
lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength;
lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength;
lexBuffer.BufferScanLength <- 0;
lexBuffer.LexemeLength <- 0;
lexBuffer.BufferAcceptAction <- -1;
let afterRefill (trans: uint16[] array,sentinel,lexBuffer:LexBuffer<_>,scanUntilSentinel,endOfScan,state,eofPos) =
// end of file occurs if we couldn't extend the buffer
if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then
let snew = int trans.[state].[eofPos] // == EOF
if snew = sentinel then
endOfScan()
else
if lexBuffer.IsPastEndOfStream then failwith "End of file on lexing stream";
lexBuffer.IsPastEndOfStream <- true;
// Printf.printf "state %d --> %d on eof\n" state snew;
scanUntilSentinel(lexBuffer,snew)
else
scanUntilSentinel(lexBuffer, state)
let onAccept (lexBuffer:LexBuffer<_>,a) =
lexBuffer.LexemeLength <- lexBuffer.BufferScanLength;
lexBuffer.BufferAcceptAction <- a;
open GenericImplFragments
[<Sealed>]
#if INTERNALIZED_POWER_PACK
type internal AsciiTables(trans: uint16[] array, accept: uint16[]) =
#else
type AsciiTables(trans: uint16[] array, accept: uint16[]) =
#endif
let rec scanUntilSentinel(lexBuffer, state) =
let sentinel = 255 * 256 + 255
// Return an endOfScan after consuming the input
let a = int accept.[state]
if a <> sentinel then
onAccept (lexBuffer,a)
if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then
lexBuffer.DiscardInput();
lexBuffer.RefillBuffer ();
// end of file occurs if we couldn't extend the buffer
afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,256 (* == EOF *) )
else
// read a character - end the scan if there are no further transitions
let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos])
let snew = int trans.[state].[inp]
if snew = sentinel then
lexBuffer.EndOfScan()
else
lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1;
// Printf.printf "state %d --> %d on '%c' (%d)\n" state snew (Char.chr inp) inp;
scanUntilSentinel(lexBuffer, snew)
/// Interpret tables for an ascii lexer generated by fslex.
member tables.Interpret(initialState,lexBuffer : LexBuffer<byte>) =
startInterpret(lexBuffer)
scanUntilSentinel(lexBuffer, initialState)
/// Interpret tables for an ascii lexer generated by fslex.
member tables.AsyncInterpret(initialState,lexBuffer : LexBuffer<byte>) =
let rec scanUntilSentinel(lexBuffer,state) : Async<int> =
async {
let sentinel = 255 * 256 + 255
// Return an endOfScan after consuming the input
let a = int accept.[state]
if a <> sentinel then
onAccept (lexBuffer,a)
if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then
lexBuffer.DiscardInput();
do! lexBuffer.AsyncRefillBuffer ();
// end of file occurs if we couldn't extend the buffer
return! afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,endOfScan,state,256 (* == EOF *) )
else
// read a character - end the scan if there are no further transitions
let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos])
let snew = int trans.[state].[inp]
if snew = sentinel then
return! endOfScan()
else
lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1;
return! scanUntilSentinel(lexBuffer,snew)
}
and endOfScan() =
async { return lexBuffer.EndOfScan() }
startInterpret(lexBuffer)
scanUntilSentinel(lexBuffer, initialState)
static member Create(trans,accept) = new AsciiTables(trans,accept)
[<Sealed>]
#if INTERNALIZED_POWER_PACK
type internal UnicodeTables(trans: uint16[] array, accept: uint16[]) =
#else
type UnicodeTables(trans: uint16[] array, accept: uint16[]) =
#endif
let sentinel = 255 * 256 + 255
let numUnicodeCategories = 30
let numLowUnicodeChars = 128
let numSpecificUnicodeChars = (trans.[0].Length - 1 - numLowUnicodeChars - numUnicodeCategories)/2
let lookupUnicodeCharacters (state,inp) =
let inpAsInt = int inp
// Is it a fast ASCII character?
if inpAsInt < numLowUnicodeChars then
int trans.[state].[inpAsInt]
else
// Search for a specific unicode character
let baseForSpecificUnicodeChars = numLowUnicodeChars
let rec loop i =
if i >= numSpecificUnicodeChars then
// OK, if we failed then read the 'others' entry in the alphabet,
// which covers all Unicode characters not covered in other
// ways
let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2
let unicodeCategory = System.Char.GetUnicodeCategory(inp)
//System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]);
int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory]
else
// This is the specific unicode character
let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2])
//System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]);
// OK, have we found the entry for a specific unicode character?
if c = inp
then int trans.[state].[baseForSpecificUnicodeChars+i*2+1]
else loop(i+1)
loop 0
let eofPos = numLowUnicodeChars + 2*numSpecificUnicodeChars + numUnicodeCategories
let rec scanUntilSentinel(lexBuffer,state) =
// Return an endOfScan after consuming the input
let a = int accept.[state]
if a <> sentinel then
onAccept(lexBuffer,a)
if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then
lexBuffer.DiscardInput();
lexBuffer.RefillBuffer ();
// end of file occurs if we couldn't extend the buffer
afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,eofPos)
else
// read a character - end the scan if there are no further transitions
let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos]
// Find the new state
let snew = lookupUnicodeCharacters (state,inp)
if snew = sentinel then
lexBuffer.EndOfScan()
else
lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1;
// Printf.printf "state %d --> %d on '%c' (%d)\n" s snew (char inp) inp;
scanUntilSentinel(lexBuffer,snew)
// Each row for the Unicode table has format
// 128 entries for ASCII characters
// A variable number of 2*UInt16 entries for SpecificUnicodeChars
// 30 entries, one for each UnicodeCategory
// 1 entry for EOF
member tables.Interpret(initialState,lexBuffer : LexBuffer<char>) =
startInterpret(lexBuffer)
scanUntilSentinel(lexBuffer, initialState)
member tables.AsyncInterpret(initialState,lexBuffer : LexBuffer<char>) =
let rec scanUntilSentinel(lexBuffer, state) =
async {
// Return an endOfScan after consuming the input
let a = int accept.[state]
if a <> sentinel then
onAccept(lexBuffer,a)
if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then
lexBuffer.DiscardInput();
lexBuffer.RefillBuffer ();
// end of file occurs if we couldn't extend the buffer
return! afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,endOfScan,state,eofPos)
else
// read a character - end the scan if there are no further transitions
let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos]
// Find the new state
let snew = lookupUnicodeCharacters (state,inp)
if snew = sentinel then
return! endOfScan()
else
lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1;
return! scanUntilSentinel(lexBuffer, snew)
}
and endOfScan() =
async { return lexBuffer.EndOfScan() }
startInterpret(lexBuffer)
scanUntilSentinel(lexBuffer, initialState)
static member Create(trans,accept) = new UnicodeTables(trans,accept)
|