File: xml-parser.lua

package info (click to toggle)
lua-gtk 0.9%2B20100528-2
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 2,176 kB
  • ctags: 1,934
  • sloc: ansic: 9,571; sh: 373; makefile: 241
file content (402 lines) | stat: -rwxr-xr-x 11,380 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
-- vim:sw=4:sts=4
--
-- The actual parser for types.xml.
--

-- Lua Expat Binding - expat is an XML parser library
-- It is available in Debian as the package liblua5.1-expat0.  Links:
--  http://www.keplerproject.org/luaexpat/
--  http://www.luaforge.net/projects/luaexpat/

local M = {}
local lxp = require "lxp"
setmetatable(M, {__index=_G})
setfenv(1, M)

local curr_func = nil
local curr_enum = nil
local parser = nil
local xml_curr_line = nil
local input_file_name = nil

funclist = {}	-- [name] = [ [rettype,"retval","fileid"],
		--   [arg1type, arg1name], ...]
typedefs = {
    ["vararg"] = { type="fundamental", name="vararg", fname="vararg", size=0 },
} -- [id] = { type=..., name=..., struct=... }
  -- struct = { name, size, align, members, _type, fields } (same for enum)
enum_values = {}    -- [name] = { val, context }
globals = {}	    -- [name] = {...}
filelist = {}	-- [id] = "full path"

max_bit_offset = 0
max_bit_length = 0

local type_override = {
    ["GtkObject.flags"] = { "GtkWidgetFlags" },
}

---
-- Display an error message with the current XML parsing position.
--
local function parse_error(...)
    local line, col = parser:pos()
    local s = string.format("%s(%d): %s", input_file_name, line,
	string.format(...))
    print(s)
    print(xml_curr_line)
    parse_errors = parse_errors + 1
    if parse_errors > 20 then
	print("Too many errors, exiting.")
	os.exit(1)
    end
end


---
-- Verify that the table "el" has all given fields.
--
-- @return  false on success, true on error
--
local function check_fields(el, ...)
    local err = false
    for i = 1, select('#', ...) do
	local f = select(i, ...)
	if not el[f] then
	    parse_error("missing attribute %s", f)
	    err = true
	end
    end
    return err
end

---
-- An override entry has been found.  It gives the name of the type to use,
-- but we need the type ID, i.e. a "_" followed by a number.  As I don't want
-- to build another index, search the list of types
--
local function do_override(ov)
    if ov[2] then return ov[2] end
    local name = ov[1]
    for k, v in pairs(typedefs) do
	if v.name == name then
	    ov[2] = k
	    return k
	end
    end
    print("Override type not found:", name)
end


---
-- Handle Struct and Union declarations.
--
-- @param el  Element information
-- @param what  "struct" or "union"
--
local function xml_struct_union(p, el, what)

    local members, my_name, struct

    members = {}
    my_name = el.name or el.demangled
    if not my_name then
	parse_error("%s without name or demangled attribute", what)
	return
    end

    if check_fields(el, "id") then return end

    -- remove leading "_", which all structures and unions seem to have.
    -- my_name = my_name:gsub("^_", "")

    if el.incomplete then
	el.size = 0
    else
	if not el.size then
	    parse_error("%s %s without size", what, my_name)
	    return
	end
	if not el.members then
	    parse_error("%s %s without member list", what, my_name)
	    return
	end
	for w in string.gmatch(el.members, "[_0-9]+") do
	    members[#members + 1] = w
	end
    end

    typedefs[el.id] = { type=what, name=my_name, struct = {
	name=my_name,
	size=el.size,	    -- total size in bits (unset for incomplete structs)
	align=el.align,
	members=members,    -- list (in order) of the IDs in fields
	_type=what,
	fields={}	    -- [ID] = { name, offset, ... }
    }, file_id=el.file }

    -- substructure of another structure?  If so, hook it in there
    if el.context and el.context ~= "_1" then
	local t = typedefs[el.context]
	if not t then
	    print("Union/Structure for unknown structure " .. el.context)
	    return
	end
	local st = t.struct
	assert(not st.fields[el.id], "repeated ID " .. el.id .. " in "
	    .. my_name)
	st.fields[el.id] = { type=what, id=el.id }
    end
end



-- Common handler for constructors and destructors, which are both not used
-- but must be handled.
local function ignore_member(p, el, name)
    if check_fields(el, "id", "context") then return end
    local t = typedefs[el.context]
    if not t then
	parse_error(name .. " for unknown structure %s", el.context)
	return
    end
    local st = t.struct
    st.fields[el.id] = { type=name, name=el.name or el.demangled }
    curr_func = nil
end


local xml_tags = {

    -- not interested in namespaces.
    Namespace = function(p, el)
    end,

    -- store functions with their prototype.  In this XML tag there already
    -- is the type of the return value, additional arguments are appended
    -- one by one.  The file ID where this function is defined is "hidden"
    -- in the otherwise unused prototype[1][3].
    Function = function(p, el)
	if check_fields(el, "name", "returns", "file") then return end
	if el.attributes and string.match(el.attributes, "visibility%(hidden%)") then
	    curr_func = nil
	    return
	end
	if config.lib[el.name] then
	    curr_func = nil
	    return
	end
	curr_func = { { el.returns, "retval", el.file } }
	funclist[el.name] = curr_func
    end,

    -- store the argument's type and the name.
    Argument = function(p, el)
	if not curr_func then return end
	if check_fields(el, "type") then return end
	local name = el.name or string.format("arg_%d", #curr_func)
	curr_func[#curr_func + 1] = { el.type, name }
    end,

    -- translated to vararg argument later
    Ellipsis = function(p, el)
	if curr_func then
	    curr_func[#curr_func + 1] = { "vararg", "vararg" }
	end
    end,

    -- Declare a type being a function prototype.  These don't have names or
    -- file_id, but there always is a PointerType and then a Typedef somewhere
    -- with a name and file_id, which will then be filled in.
    -- Such function types are NOT added to funclist - which is indexed
    -- by name, and after all this is just a type and not an actual function
    -- you could call.
    FunctionType = function(p, el)
	if check_fields(el, "id", "returns") then return end
	curr_func = { { el.returns, "retval", nil } }
	typedefs[el.id] = { type="func", prototype=curr_func, id=el.id }
    end,

    -- Not interested much in constructors.  Store anyway to avoid
    -- dangling references.
    Constructor = function(p, el)
	return ignore_member(p, el, "constructor")
    end,

    -- Ignore these items, they are not relevant but are emitted by
    -- newer versions of gccxml (starting with 20090701)
    Destructor = function(p, el)
	return ignore_member(p, el, "destructor")
    end,

    OperatorMethod = function(p, el)
	return ignore_member(p, el, "operatormethod")
    end,

    -- structures and unions
    Struct = function(p, el)
	if el.name == "_cairo" then el.name = "cairo" end
	return xml_struct_union(p, el, "struct")
    end,

    Union = function(p, el) return xml_struct_union(p, el, "union") end,

    -- member of a structure
    Field = function(p, el) 
	-- el.bits is optional.
	if check_fields(el, "id", "context", "name", "type", "offset")
	    then return end
	local t = typedefs[el.context]
	if not t then
	    parse_error("Field for unknown structure %s", el.context)
	    return
	end
	local st = t.struct
	local override = type_override[st.name .. "." .. el.name]
	if override then
	    el.type = do_override(override)
	end
	st.fields[el.id] = { name=el.name, type=el.type, offset=el.offset,
	    size=el.bits }
	max_bit_offset = math.max(max_bit_offset, el.offset)
	-- in most cases, no bit length is given; mostly it derives from the
	-- referenced type.
	if el.bits then
	    max_bit_length = math.max(max_bit_length, el.bits)
	end
    end,

    Variable = function(p, el)
	if check_fields(el, "name", "file") then return end
	globals[el.name] = el
    end,

    -- declare an alternative name for another type
    Typedef = function(p, el)
	if check_fields(el, "id", "context", "name", "type", "file") then
	    return
	end
	if el.context ~= "_1" then
	    print("Warning: typedef context is " .. el.context)
	end

	-- cairo uses a _t suffix for all types (except for _cairo); remove
	-- that.  Otherwise, the functions can't be found, e.g.
	-- cairo_surface_t_status() doesn't exist!
	if string.match(el.name, "^cairo.*_t") then
	    el.name = string.sub(el.name, 1, -3)
	    -- print("rename", el.name)
	end
	typedefs[el.id] = { type="typedef", name=el.name, what=el.type,
	    file_id=el.file }
    end,

    EnumValue = function(p, el)
	if check_fields(el, "name", "init") then return end
	enum_values[el.name] = { val=tonumber(el.init), context=curr_enum }
    end,

    -- declare a type being an enum
    Enumeration = function(p, el)
	if check_fields(el, "id", "name", "size", "align") then return end
	typedefs[el.id] = { type="enum", name=el.name, size=el.size,
	    align=el.align, file_id=el.file }
	curr_enum = el.id
    end,

    -- declare a type being a pointer to another type
    PointerType = function(p, el)
	if check_fields(el, "id", "type", "size", "align") then return end
	typedefs[el.id] = { type="pointer", what=el.type, size=el.size,
	    align=el.align }
    end,

    FundamentalType = function(p, el)
	-- size is optional (for void)
	if check_fields(el, "id", "name", "align") then return end
	t = { type="fundamental", fname=el.name, size=el.size, align=el.align,
	    pointer=0 }
	    -- useless element: fid=fid
	types.register_fundamental(t)
	typedefs[el.id] = t
	if not el.size and el.name ~= "void" then
	    parse_error("fundamental type %s without size", el.name)
	end
    end,

    -- wrapper for another type adding qualifiers: const, restrict, volatile
    CvQualifiedType = function(p, el)
	if check_fields(el, "id", "type") then return end
	typedefs[el.id] = { type="qualifier", what=el.type,
	    restrict=el.restrict, const=el.const, volatile=el.volatile }
    end,

    ArrayType = function(p, el)
	if check_fields(el, "id", "min", "max", "align", "type") then return end
	local max = tonumber(string.match(el.max, "^(%d+)")) or 0
	typedefs[el.id] = { type="array", min=el.min, max=max,
	    align=el.align, what=el.type }
    end,

    -- a function parameter that is passed by reference; only used in the
    -- automatically generated constructors and destructors, which are not
    -- relevant.
    ReferenceType = function(p, el)
    end,

    -- Associate file names (including full path) to the file IDs.  This is
    -- used later to filter out relevant defines, which are identified by
    -- the path of the files.
    File = function(p, el)
	filelist[el.id] = el.name
    end,

}


local unhandled = {}	    -- [name] = true
local function regular_parser(p, name, el)
    local f = xml_tags[name]
    if f then return f(p, el) end

    if not unhandled[name] then
	print("Unhandled XML element " .. name)
	unhandled[name] = true
    end
end


local function look_for_gcc_xml(p, name, el)
    if name == "GCC_XML" then
	callbacks.StartElement = regular_parser
    end
end


---
-- Read the given XML file
--
-- @param xml_file filename (with path) of the input file
--
function parse_xml(xml_file)
    input_file_name = xml_file
    callbacks = { StartElement = look_for_gcc_xml }
    parser = lxp.new(callbacks, "::")
    for l in io.lines(xml_file) do
	xml_curr_line = l
	parser:parse(l)
	parser:parse("\n")
    end
    parser:parse()	    -- close document
    parser:close()	    -- close parser
    parser = nil
    callbacks = nil
end

function show_statistics()
    info_num("Max. bit offset", max_bit_offset)
    info_num("Max. bit length", max_bit_length)
end

return M