1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
|
#!/usr/bin/python3
# coding: utf-8
# This parser starts by parsing all the javascript code, and representing each
# file/object/function as a python class. Afterwards, each object is gone
# through individually to produce the gtk-doc-friendly xml files
#
# The parsers uses the concept of state. At each line, the parser takes a
# particular state, and behaves accordingly. Conversely, the contents of the
# lines can cause the parser to change state.
# In normal text, the parser is in STATE_NORMAL, in which all it does is keep
# track of scopes and the number of '{' and '}' brackets. In a comment block
# that is not documentation, the parser takes STATE_COMMENT and ignores
# everything until we leave the comment.
#
# When the parser sees /**, it enters STATE_INIT, which tells us that the next
# line is important. The next line is usually in the form
#
# * function_name:
#
# or
#
# * #ObjectName:
#
# At this point, we create a JSFunction or JSObject object, and enter the next
# STATE_PROPERTY. Here the parser parses the properties of the form
#
# * @prop (type): description
#
# until it reaches something that is not a property (signified by two line
# breaks not followed by another property). Then the parser drops to
# STATE_DESCRIPTION, and reads every line and puts it into the object's
# description.
#
# STATE_DESCRIPTION continues until the comment block ends, or if it encounters
#
# * Returns (type): description
#
# Then it morphs into STATE_RETURN, and puts the remaining lines into the
# description of the return value. Alternatively, in the case of objects,
#
# * Inherits: Object.Class
#
# specifies the ancestor of an object. If the parser reads this line, it drops
# to STATE_COMMENT because there shouldn't be anything interesting afterwards.
import sys
import os
import re
from gen_lib import *
files = []
objects = {}
ROOT_DIR = os.path.abspath(os.path.dirname(sys.argv[0])) + '/../../../'
if len(sys.argv) > 1:
ROOT_DIR = sys.argv[1]
if len(sys.argv) > 2:
DEST_DIR = sys.argv[2]
os.chdir(DEST_DIR)
JS_UI_DIR = os.path.join(ROOT_DIR, 'js/ui/')
JS_MISC_DIR = os.path.join(ROOT_DIR, 'js/misc/')
# Allow types like "object/string"
COMMENT_REGEX = re.compile(r'/\*([^*]|(\*[^/]))*\*+/')
RETURNS_REGEX = re.compile(r'^Returns\s*\(?(\w*\.?\w+/?\w*\.?\w*)?\)?:(.*)')
INHERITS_REGEX = re.compile(r'^Inherits:\s*(\w*\.?\w+/?\w*\.?\w*)\s*$')
PROPERTY_REGEX = re.compile(r'^@(\w+)\s*\(?(\w*\.?\w+/?\w*\.?\w*)?\)?:(.*)')
FILE_NAME_REGEX = re.compile(r'FILE:\s*(\w+\.js):?')
SIGNAL_NAME_REGEX = re.compile(r'SIGNAL:\s*([\w-]+):?')
ENUM_NAME_REGEX = re.compile(r'ENUM:\s*(\w+):?')
FUNCTION_NAME_REGEX = re.compile(r'^(\w+):?\s*$')
OBJECT_NAME_REGEX = re.compile(r'^#(\w+):?\s*$')
FILE_REGEX = re.compile(r'\w*\.js')
COMMENT_START_REGEX = re.compile(r'^\s*\* ?')
BLOCK_START_REGEX = re.compile(r'^\s*/\*\*\s*$')
STRING_REGEX = re.compile(r'\'[^\']*\'|"[^"]*"')
STATE_NORMAL = 0
STATE_PROPERTY = 1
STATE_DESCRIPTION = 2
STATE_RETURN = 3
STATE_NAME = 4
STATE_INIT = 5
STATE_COMMENT = 6
################################################################################
################################################################################
## The legendary parse function ##
################################################################################
################################################################################
ui_files = [os.path.join(JS_UI_DIR, x) for x in os.listdir(JS_UI_DIR)]
ui_files.sort()
misc_files = [os.path.join(JS_MISC_DIR, x) for x in os.listdir(JS_MISC_DIR)]
misc_files.sort()
_files = ui_files + misc_files
for _file in _files:
parts = _file.split("/")
if not FILE_REGEX.match(parts[-1]):
continue
file_obj = open(_file, 'r', encoding="utf-8")
curr_file = JSFile(parts[-2], parts[-1][:-3])
files.append(curr_file)
bracket_count = 0 # no. of '{' - no. of '}'
# The current object - it is either the top-level file or the JSObject we
# are parsing.
curr_obj = curr_file
# The current item being processed, either the top-level file, description
# of the object or a function
curr_item = curr_file
# The current property, if any
curr_prop = None
state = STATE_NORMAL
scope = ''
for line in file_obj:
################################################################################
# Process all unimportant comments #
################################################################################
# Strip ' * ' at the beginning of each long comment block
if state == STATE_PROPERTY or \
state == STATE_DESCRIPTION or \
state == STATE_RETURN or \
state == STATE_INIT:
line = COMMENT_START_REGEX.sub('', line)
if len(line) > 0 and line[0] == '/':
state = STATE_NORMAL
curr_item = None
continue
# If we are in a (useless) comment, skip unless comment ends in this row
elif state == STATE_COMMENT:
if line.find('*/') == -1:
continue
else:
line = line[line.find('*/') + 2:]
state = STATE_NORMAL
# In normal cases, strip comments if necessary, unless it is the
# beginning of a doc block, in which case we set the STATE_INIT state
else:
if '//' in line:
line = line[:line.find(r'//')]
if '/*' in line:
# Strip all in-line comments, eg. 'asdf /* asdf */ asdf'
line = COMMENT_REGEX.sub('', line)
if BLOCK_START_REGEX.match(line):
state = STATE_INIT
continue
if '/*' in line:
line = line[:line.find(r'/*')]
state = STATE_COMMENT
################################################################################
# Process actual useful content #
################################################################################
if state == STATE_INIT:
if FILE_NAME_REGEX.match(line) and bracket_count == 0:
curr_item = curr_file
curr_obj = curr_file
objects[curr_file.name] = curr_file
state = STATE_PROPERTY
elif OBJECT_NAME_REGEX.match(line) and bracket_count == 0:
curr_item = JSObject(OBJECT_NAME_REGEX.match(line).group(1))
curr_obj = curr_item
objects[curr_file.name + '.' + curr_obj.name] = curr_item
curr_file.add_object(curr_item)
state = STATE_PROPERTY
elif FUNCTION_NAME_REGEX.match(line) and \
((bracket_count == 1 and curr_obj != curr_file) or \
(bracket_count == 0 and curr_obj == curr_file)):
curr_item = JSFunction(FUNCTION_NAME_REGEX.match(line).group(1))
curr_obj.add_function(curr_item)
state = STATE_PROPERTY
elif SIGNAL_NAME_REGEX.match(line) and \
(bracket_count > 0 and curr_obj != curr_file):
curr_item = JSSignal(SIGNAL_NAME_REGEX.match(line).group(1))
curr_obj.add_signal(curr_item)
state = STATE_PROPERTY
elif ENUM_NAME_REGEX.match(line) and bracket_count == 0:
curr_item = JSEnum(ENUM_NAME_REGEX.match(line).group(1))
objects[curr_file.name + '.' + curr_item.name] = curr_item
curr_file.add_enum(curr_item)
state = STATE_PROPERTY
else:
state = STATE_COMMENT
continue
if state == STATE_PROPERTY:
if len(line.strip()) == 0:
if curr_prop is not None:
curr_prop = None
continue
else:
# Ignore blank lines if nothing has happened yet
continue
prop = PROPERTY_REGEX.match(line)
if prop:
curr_prop = JSProperty(*prop.groups())
curr_item.add_property(curr_prop)
else:
if curr_prop:
curr_prop.append_description(line)
else:
# The next block will parse this description properly
state = STATE_DESCRIPTION
if state == STATE_DESCRIPTION:
if RETURNS_REGEX.match(line):
state = STATE_RETURN
curr_prop = JSProperty('Returns', *RETURNS_REGEX.match(line).groups())
curr_item.set_return(curr_prop)
elif INHERITS_REGEX.match(line):
# Anything after the inherit line shouldn't be there
state = STATE_COMMENT
curr_item.set_inherit(INHERITS_REGEX.match(line).group(1))
else:
curr_item.append_description(line)
continue
if state == STATE_RETURN:
curr_prop.append_description(line)
# Here state should be STATE_NORMAL. It might be, in fact,
# STATE_COMMENT, since the line ends with /*, and the state is set to
# STATE_COMMENT for preparation of the next line. However, the
# remaining of the line should be treated as a normal line. If we are
# genuinely inside a comment, we would have `continue`ed at the
# beginning.
if state == STATE_NORMAL or state == STATE_COMMENT:
# the "scope" variable will be last updated before we enter the
# scope, so will be the "function thing() {}" or "thing.prototype =
# {" line. We use this to check if we are parsing the right thing.
if bracket_count == 0:
scope = line
# Don't count the brackets inside strings. STRING_REGEX recognizes
# ' and " but doesn't know if they are escaped. So replace away all
# escaped quotes
line = STRING_REGEX.sub('', line.replace("\\'", "").replace('\\"', ''))
bracket_count += line.count('{') - line.count('}')
if bracket_count == 0:
# Cinnamon-style objects and Lang.Class objects
if curr_obj.orig_name + '.prototype' in scope or \
re.match(curr_obj.orig_name + r'\s*=\s*Lang\.Class', scope):
curr_obj = curr_file
curr_item = curr_file
################################################################################
################################################################################
## Generate the XML ##
################################################################################
################################################################################
write_chapters_file(files)
try:
os.mkdir('ui')
except OSError:
pass
try:
os.mkdir('misc')
except OSError:
pass
for _file in files:
for obj in _file.objects:
create_file(obj)
|