1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
include <mach.h>
include <ctype.h>
include <lexnum.h>
# LEXNUM -- Lexically analyse a character string, determine if string is
# a number, and if so, the type of number, and the number of characters
# in the number. The ip_start argument is left pointing at the first char
# of the number (or other token), and the number of chars in the number is
# returned as the third argument (0 if not a number).
#
# NOTE - See .doc/lexnum.hlp for a description of the states of the automaton.
define SZ_STACK 15
# Lexical actions. "Reduce" means exit, returning code identifying lexical
# type of token. "Shift" means switch to a new state in the automaton.
# "Revert" means reduce class "other" in the previous state.
define ACCEPT -6 # remain in same state
define REVERT -5 # revert to earlier state
# Character classes
define SIGNCHAR 1 # +-
define OCTDIG 2 # 0-7
define DECDIG 3 # 8-9
define HEXDIG 4 # a-fA-F
define REALEXP 5 # eEdD
define SEXAG 6 # :
define FRACTION 7 # .
define HEXSUFFIX 8 # xX
define OCTSUFFIX 9 # bB
define OTHER 10 # invalid character
define NCC 10
# States of the automaton
define START 1 # initial state
define UNM 2 # unop or number
define ODH 3 # octal, decimal, hex, or real
define DHR 4 # decimal, hex, or real
define QRF 5 # maybe real fraction
define HEX 6 # hex
define QHX 7 # maybe hex or real exponent
define QRN 8 # maybe real number
define OHN 9 # octal or hex number
define RFR 10 # real fraction
define RRX 11 # real or real exponent
define QRX 12 # maybe real exponent
define HRX 13 # hex or real exponent
define RNM 14 # real number
define REX 15 # real exponent
define NSTATES 15
# LEXNUM -- Determine if the next sequence of characters in the string STR
# can be interpreted as a number. Return the numeric type as the function
# value or LEX_NONNUM if the string is not a number.
int procedure lexnum (str, ip_start, nchars)
char str[ARB] # string to be decoded
int ip_start # starting index in string
int nchars # receives nchars in next token
char ch
int stk_ip[SZ_STACK]
int ip, sp, cc, state, ip_save, toktype, act
short stk_state[SZ_STACK], action[NCC,NSTATES]
int strncmp()
include "lexdata.inc"
begin
while (IS_WHITE (str[ip_start]))
ip_start = ip_start + 1
ip = ip_start
# INDEF is a legal number and is best dealt with as a special case.
if (str[ip] == 'I')
if (strncmp (str[ip], "INDEF", 5) == 0) {
nchars = 5
return (LEX_REAL)
}
state = START # initialization
ip_save = ip
sp = 0
repeat {
ch = str[ip]
repeat { # determine character class
switch (ch) {
case '+','-':
cc = SIGNCHAR
break
case '0','1','2','3','4','5','6','7':
cc = OCTDIG
break
case '8','9':
cc = DECDIG
break
case 'B':
cc = OCTSUFFIX
break
case 'D','E':
cc = REALEXP
break
case 'A','C','F':
cc = HEXDIG
break
case ':':
cc = SEXAG
break
case '.':
cc = FRACTION
break
default:
if (IS_LOWER (ch))
ch = TO_UPPER (ch) # and repeat
else if (ch == 'X') {
cc = HEXSUFFIX
break
} else {
cc = OTHER
break
}
}
}
#call eprintf ("ip=%2d, sp=%2d, ch=%c, cc=%d, state=%d, action=%d\n")
#call pargi(ip); call pargi(sp)
#call pargc(ch); call pargi(cc); call pargi(state)
#call pargs(action[cc,state])
# Perform the action indicated by the action table when this
# class of character is encountered in the current state.
act = action[cc,state]
if (act == ACCEPT) {
ip = ip + 1 # a simple optimization
next
}
switch (act) {
case REVERT:
repeat {
ip = stk_ip[sp]
state = stk_state[sp]
toktype = action[OTHER,state]
sp = sp - 1
} until (toktype != REVERT || sp <= 0)
break
case LEX_OCTAL, LEX_DECIMAL, LEX_HEX, LEX_REAL, LEX_NONNUM:
toktype = action[cc,state]
if (toktype == LEX_OCTAL && cc == OCTSUFFIX)
ip = ip + 1 # discard suffix char
else if (toktype == LEX_HEX && cc == HEXSUFFIX)
ip = ip + 1
break
default: # shift to new state
sp = sp + 1
if (sp > SZ_STACK) {
toktype = LEX_NONNUM
break
}
stk_ip[sp] = ip
stk_state[sp] = state
ip = ip + 1
state = action[cc,state]
if (state < 1 || state > NSTATES)
call error (0, "In LEXNUM: cannot happen")
}
}
if (toktype == LEX_NONNUM)
nchars = 0
else
nchars = ip - ip_save
return (toktype)
end
|