1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
|
/* Parse function for JavaScript. Makes use of the tokenizer from
* tokenizejavascript.js. Note that your parsers do not have to be
* this complicated -- if you don't want to recognize local variables,
* in many languages it is enough to just look for braces, semicolons,
* parentheses, etc, and know when you are inside a string or comment.
*
* See manual.html for more info about the parser interface.
*/
var JSParser = Editor.Parser = (function() {
// Token types that can be considered to be atoms.
var atomicTypes = {"atom": true, "number": true, "variable": true, "string": true, "regexp": true};
// Setting that can be used to have JSON data indent properly.
var json = false;
// Constructor for the lexical context objects.
function JSLexical(indented, column, type, align, prev, info) {
// indentation at start of this line
this.indented = indented;
// column at which this scope was opened
this.column = column;
// type of scope ('vardef', 'stat' (statement), 'form' (special form), '[', '{', or '(')
this.type = type;
// '[', '{', or '(' blocks that have any text after their opening
// character are said to be 'aligned' -- any lines below are
// indented all the way to the opening character.
if (align != null)
this.align = align;
// Parent scope, if any.
this.prev = prev;
this.info = info;
}
// My favourite JavaScript indentation rules.
function indentJS(lexical) {
return function(firstChars) {
var firstChar = firstChars && firstChars.charAt(0), type = lexical.type;
var closing = firstChar == type;
if (type == "vardef")
return lexical.indented + 4;
else if (type == "form" && firstChar == "{")
return lexical.indented;
else if (type == "stat" || type == "form")
return lexical.indented + indentUnit;
else if (lexical.info == "switch" && !closing)
return lexical.indented + (/^(?:case|default)\b/.test(firstChars) ? indentUnit : 2 * indentUnit);
else if (lexical.align)
return lexical.column - (closing ? 1 : 0);
else
return lexical.indented + (closing ? 0 : indentUnit);
};
}
// The parser-iterator-producing function itself.
function parseJS(input, basecolumn) {
// Wrap the input in a token stream
var tokens = tokenizeJavaScript(input);
// The parser state. cc is a stack of actions that have to be
// performed to finish the current statement. For example we might
// know that we still need to find a closing parenthesis and a
// semicolon. Actions at the end of the stack go first. It is
// initialized with an infinitely looping action that consumes
// whole statements.
var cc = [json ? expressions : statements];
// Context contains information about the current local scope, the
// variables defined in that, and the scopes above it.
var context = null;
// The lexical scope, used mostly for indentation.
var lexical = new JSLexical((basecolumn || 0) - indentUnit, 0, "block", false);
// Current column, and the indentation at the start of the current
// line. Used to create lexical scope objects.
var column = 0;
var indented = 0;
// Variables which are used by the mark, cont, and pass functions
// below to communicate with the driver loop in the 'next'
// function.
var consume, marked;
// The iterator object.
var parser = {next: next, copy: copy};
function next(){
// Start by performing any 'lexical' actions (adjusting the
// lexical variable), or the operations below will be working
// with the wrong lexical state.
while(cc[cc.length - 1].lex)
cc.pop()();
// Fetch a token.
var token = tokens.next();
// Adjust column and indented.
if (token.type == "whitespace" && column == 0)
indented = token.value.length;
column += token.value.length;
if (token.content == "\n"){
indented = column = 0;
// If the lexical scope's align property is still undefined at
// the end of the line, it is an un-aligned scope.
if (!("align" in lexical))
lexical.align = false;
// Newline tokens get an indentation function associated with
// them.
token.indentation = indentJS(lexical);
}
// No more processing for meaningless tokens.
if (token.type == "whitespace" || token.type == "comment")
return token;
// When a meaningful token is found and the lexical scope's
// align is undefined, it is an aligned scope.
if (!("align" in lexical))
lexical.align = true;
// Execute actions until one 'consumes' the token and we can
// return it.
while(true) {
consume = marked = false;
// Take and execute the topmost action.
cc.pop()(token.type, token.content);
if (consume){
// Marked is used to change the style of the current token.
if (marked)
token.style = marked;
// Here we differentiate between local and global variables.
else if (token.type == "variable" && inScope(token.content))
token.style = "js-localvariable";
return token;
}
}
}
// This makes a copy of the parser state. It stores all the
// stateful variables in a closure, and returns a function that
// will restore them when called with a new input stream. Note
// that the cc array has to be copied, because it is contantly
// being modified. Lexical objects are not mutated, and context
// objects are not mutated in a harmful way, so they can be shared
// between runs of the parser.
function copy(){
var _context = context, _lexical = lexical, _cc = cc.concat([]), _tokenState = tokens.state;
return function copyParser(input){
context = _context;
lexical = _lexical;
cc = _cc.concat([]); // copies the array
column = indented = 0;
tokens = tokenizeJavaScript(input, _tokenState);
return parser;
};
}
// Helper function for pushing a number of actions onto the cc
// stack in reverse order.
function push(fs){
for (var i = fs.length - 1; i >= 0; i--)
cc.push(fs[i]);
}
// cont and pass are used by the action functions to add other
// actions to the stack. cont will cause the current token to be
// consumed, pass will leave it for the next action.
function cont(){
push(arguments);
consume = true;
}
function pass(){
push(arguments);
consume = false;
}
// Used to change the style of the current token.
function mark(style){
marked = style;
}
// Push a new scope. Will automatically link the current scope.
function pushcontext(){
context = {prev: context, vars: {"this": true, "arguments": true}};
}
// Pop off the current scope.
function popcontext(){
context = context.prev;
}
// Register a variable in the current scope.
function register(varname){
if (context){
mark("js-variabledef");
context.vars[varname] = true;
}
}
// Check whether a variable is defined in the current scope.
function inScope(varname){
var cursor = context;
while (cursor) {
if (cursor.vars[varname])
return true;
cursor = cursor.prev;
}
return false;
}
// Push a new lexical context of the given type.
function pushlex(type, info) {
var result = function(){
lexical = new JSLexical(indented, column, type, null, lexical, info)
};
result.lex = true;
return result;
}
// Pop off the current lexical context.
function poplex(){
if (lexical.type == ")")
indented = lexical.indented;
lexical = lexical.prev;
}
poplex.lex = true;
// The 'lex' flag on these actions is used by the 'next' function
// to know they can (and have to) be ran before moving on to the
// next token.
// Creates an action that discards tokens until it finds one of
// the given type.
function expect(wanted){
return function expecting(type){
if (type == wanted) cont();
else if (wanted == ";") pass();
else cont(arguments.callee);
};
}
// Looks for a statement, and then calls itself.
function statements(type){
return pass(statement, statements);
}
function expressions(type){
return pass(expression, expressions);
}
// Dispatches various types of statements based on the type of the
// current token.
function statement(type){
if (type == "var") cont(pushlex("vardef"), vardef1, expect(";"), poplex);
else if (type == "keyword a") cont(pushlex("form"), expression, statement, poplex);
else if (type == "keyword b") cont(pushlex("form"), statement, poplex);
else if (type == "{") cont(pushlex("}"), block, poplex);
else if (type == ";") cont();
else if (type == "function") cont(functiondef);
else if (type == "for") cont(pushlex("form"), expect("("), pushlex(")"), forspec1, expect(")"), poplex, statement, poplex);
else if (type == "variable") cont(pushlex("stat"), maybelabel);
else if (type == "switch") cont(pushlex("form"), expression, pushlex("}", "switch"), expect("{"), block, poplex, poplex);
else if (type == "case") cont(expression, expect(":"));
else if (type == "default") cont(expect(":"));
else if (type == "catch") cont(pushlex("form"), pushcontext, expect("("), funarg, expect(")"), statement, poplex, popcontext);
else pass(pushlex("stat"), expression, expect(";"), poplex);
}
// Dispatch expression types.
function expression(type){
if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator);
else if (type == "function") cont(functiondef);
else if (type == "keyword c") cont(expression);
else if (type == "(") cont(pushlex(")"), expression, expect(")"), poplex, maybeoperator);
else if (type == "operator") cont(expression);
else if (type == "[") cont(pushlex("]"), commasep(expression, "]"), poplex, maybeoperator);
else if (type == "{") cont(pushlex("}"), commasep(objprop, "}"), poplex, maybeoperator);
else cont();
}
// Called for places where operators, function calls, or
// subscripts are valid. Will skip on to the next action if none
// is found.
function maybeoperator(type, value){
if (type == "operator" && /\+\+|--/.test(value)) cont(maybeoperator);
else if (type == "operator") cont(expression);
else if (type == ";") pass();
else if (type == "(") cont(pushlex(")"), commasep(expression, ")"), poplex, maybeoperator);
else if (type == ".") cont(property, maybeoperator);
else if (type == "[") cont(pushlex("]"), expression, expect("]"), poplex, maybeoperator);
}
// When a statement starts with a variable name, it might be a
// label. If no colon follows, it's a regular statement.
function maybelabel(type){
if (type == ":") cont(poplex, statement);
else pass(maybeoperator, expect(";"), poplex);
}
// Property names need to have their style adjusted -- the
// tokenizer thinks they are variables.
function property(type){
if (type == "variable") {mark("js-property"); cont();}
}
// This parses a property and its value in an object literal.
function objprop(type){
if (type == "variable") mark("js-property");
if (atomicTypes.hasOwnProperty(type)) cont(expect(":"), expression);
}
// Parses a comma-separated list of the things that are recognized
// by the 'what' argument.
function commasep(what, end){
function proceed(type) {
if (type == ",") cont(what, proceed);
else if (type == end) cont();
else cont(expect(end));
}
return function commaSeparated(type) {
if (type == end) cont();
else pass(what, proceed);
};
}
// Look for statements until a closing brace is found.
function block(type){
if (type == "}") cont();
else pass(statement, block);
}
// Variable definitions are split into two actions -- 1 looks for
// a name or the end of the definition, 2 looks for an '=' sign or
// a comma.
function vardef1(type, value){
if (type == "variable"){register(value); cont(vardef2);}
else cont();
}
function vardef2(type, value){
if (value == "=") cont(expression, vardef2);
else if (type == ",") cont(vardef1);
}
// For loops.
function forspec1(type){
if (type == "var") cont(vardef1, forspec2);
else if (type == ";") pass(forspec2);
else if (type == "variable") cont(formaybein);
else pass(forspec2);
}
function formaybein(type, value){
if (value == "in") cont(expression);
else cont(maybeoperator, forspec2);
}
function forspec2(type, value){
if (type == ";") cont(forspec3);
else if (value == "in") cont(expression);
else cont(expression, expect(";"), forspec3);
}
function forspec3(type) {
if (type == ")") pass();
else cont(expression);
}
// A function definition creates a new context, and the variables
// in its argument list have to be added to this context.
function functiondef(type, value){
if (type == "variable"){register(value); cont(functiondef);}
else if (type == "(") cont(pushcontext, commasep(funarg, ")"), statement, popcontext);
}
function funarg(type, value){
if (type == "variable"){register(value); cont();}
}
return parser;
}
return {
make: parseJS,
electricChars: "{}:",
configure: function(obj) {
if (obj.json != null) json = obj.json;
}
};
})();
|