1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
module Puppet::Pops
module Parser
require_relative '../../../puppet/util/multi_match'
# This is an integral part of the Lexer. It is broken out into a separate module
# for maintainability of the code, and making the various parts of the lexer focused.
#
module LexerSupport
# Returns "<eof>" if at end of input, else the following 5 characters with \n \r \t escaped
def followed_by
return "<eof>" if @scanner.eos?
result = @scanner.rest[0,5] + "..."
result.gsub!("\t", '\t')
result.gsub!("\n", '\n')
result.gsub!("\r", '\r')
result
end
# Returns a quoted string using " or ' depending on the given a strings's content
def format_quote(q)
if q == "'"
'"\'"'
else
"'#{q}'"
end
end
# Raises a Puppet::LexError with the given message
def lex_error_without_pos(issue, args = {})
raise Puppet::ParseErrorWithIssue.new(issue.format(args), nil, nil, nil, nil, issue.issue_code, args)
end
# Raises a Puppet::ParserErrorWithIssue with the given issue and arguments
def lex_error(issue, args = {}, pos=nil)
raise create_lex_error(issue, args, pos)
end
def filename
file = @locator.file
file.is_a?(String) && !file.empty? ? file : nil
end
def line(pos)
@locator.line_for_offset(pos || @scanner.pos)
end
def position(pos)
@locator.pos_on_line(pos || @scanner.pos)
end
def lex_warning(issue, args = {}, pos=nil)
Puppet::Util::Log.create({
:level => :warning,
:message => issue.format(args),
:issue_code => issue.issue_code,
:file => filename,
:line => line(pos),
:pos => position(pos),
})
end
# @param issue [Issues::Issue] the issue
# @param args [Hash<Symbol,String>] Issue arguments
# @param pos [Integer]
# @return [Puppet::ParseErrorWithIssue] the created error
def create_lex_error(issue, args = {}, pos = nil)
Puppet::ParseErrorWithIssue.new(
issue.format(args),
filename,
line(pos),
position(pos),
nil,
issue.issue_code,
args)
end
# Asserts that the given string value is a float, or an integer in decimal, octal or hex form.
# An error is raised if the given value does not comply.
#
def assert_numeric(value, pos)
if value =~ /^0[xX]/
lex_error(Issues::INVALID_HEX_NUMBER, {:value => value}, pos) unless value =~ /^0[xX][0-9A-Fa-f]+$/
elsif value =~ /^0[^.]/
lex_error(Issues::INVALID_OCTAL_NUMBER, {:value => value}, pos) unless value =~ /^0[0-7]+$/
elsif value =~ /^\d+[eE.]/
lex_error(Issues::INVALID_DECIMAL_NUMBER, {:value => value}, pos) unless value =~ /^\d+(?:\.\d+)?(?:[eE]-?\d+)?$/
else
lex_error(Issues::ILLEGAL_NUMBER, {:value => value}, pos) unless value =~ /^\d+$/
end
end
# A TokenValue keeps track of the token symbol, the lexed text for the token, its length
# and its position in its source container. There is a cost associated with computing the
# line and position on line information.
#
class TokenValue < Locatable
attr_reader :token_array
attr_reader :offset
attr_reader :locator
def initialize(token_array, offset, locator)
@token_array = token_array
@offset = offset
@locator = locator
end
def length
@token_array[2]
end
def [](key)
case key
when :value
@token_array[1]
when :file
@locator.file
when :line
@locator.line_for_offset(@offset)
when :pos
@locator.pos_on_line(@offset)
when :length
@token_array[2]
when :locator
@locator
when :offset
@offset
else
nil
end
end
def to_s
# This format is very compact and is intended for debugging output from racc parser in
# debug mode. If this is made more elaborate the output from a debug run becomes very hard to read.
#
"'#{self[:value]} #{@token_array[0]}'"
end
# TODO: Make this comparable for testing
# vs symbolic, vs array with symbol and non hash, array with symbol and hash)
#
end
MM = Puppet::Util::MultiMatch
MM_ANY = MM::NOT_NIL
BOM_UTF_8 = MM.new(0xEF, 0xBB, 0xBF, MM_ANY)
BOM_UTF_16_1 = MM.new(0xFE, 0xFF, MM_ANY, MM_ANY)
BOM_UTF_16_2 = MM.new(0xFF, 0xFE, MM_ANY, MM_ANY)
BOM_UTF_32_1 = MM.new(0x00, 0x00, 0xFE, 0xFF )
BOM_UTF_32_2 = MM.new(0xFF, 0xFE, 0x00, 0x00 )
BOM_UTF_1 = MM.new(0xF7, 0x64, 0x4C, MM_ANY)
BOM_UTF_EBCDIC = MM.new(0xDD, 0x73, 0x66, 0x73 )
BOM_SCSU = MM.new(0x0E, 0xFE, 0xFF, MM_ANY)
BOM_BOCU = MM.new(0xFB, 0xEE, 0x28, MM_ANY)
BOM_GB_18030 = MM.new(0x84, 0x31, 0x95, 0x33 )
LONGEST_BOM = 4
def assert_not_bom(content)
name, size =
case bom = get_bom(content)
when BOM_UTF_32_1, BOM_UTF_32_2
['UTF-32', 4]
when BOM_GB_18030
['GB-18030', 4]
when BOM_UTF_EBCDIC
['UTF-EBCDIC', 4]
when BOM_SCSU
['SCSU', 3]
when BOM_UTF_8
['UTF-8', 3]
when BOM_UTF_1
['UTF-1', 3]
when BOM_BOCU
['BOCU', 3]
when BOM_UTF_16_1, BOM_UTF_16_2
['UTF-16', 2]
else
return
end
lex_error_without_pos(
Puppet::Pops::Issues::ILLEGAL_BOM,
{ :format_name => name,
:bytes => "[#{bom.values[0,size].map {|b| "%X" % b}.join(" ")}]"
})
end
def get_bom(content)
# get 5 bytes as efficiently as possible (none of the string methods works since a bom consists of
# illegal characters on most platforms, and there is no get_bytes(n). Explicit calls are faster than
# looping with a lambda. The get_byte returns nil if there are too few characters, and they
# are changed to spaces
MM.new(
(content.getbyte(0) || ' '),
(content.getbyte(1) || ' '),
(content.getbyte(2) || ' '),
(content.getbyte(3) || ' ')
)
end
end
end
end
|