入口
从下面代码可以看出, 其实解析重点就是循环的调用parseStatement, 直到遇到type == tt.eof, 此时就完成了解析.
然后再进行格式检查和Directive处理.
/**
* parse的入口函数, 解析出program的node.
* @param {Node} node
* @returns {Node}
*/
pp.parseTopLevel = function(node) {
let exports = Object.create(null)
if (!node.body) node.body = []
// 重点
while (this.type !== tt.eof) {
let stmt = this.parseStatement(null, true, exports)
node.body.push(stmt)
}
if (this.inModule)
// undefined exports在checkLocalExport时插入, 如果是没有定义的本地变量, 则报错.
for (let name of Object.keys(this.undefinedExports))
this.raiseRecoverable(this.undefinedExports[name].start, `Export '${name}' is not defined`)
this.adaptDirectivePrologue(node.body)
this.next()
node.sourceType = this.options.sourceType
return this.finishNode(node, NodeTypes.Program)
}
parseStatement
解析单个表达式, 是每一段解析的开始入口. 从代码中可以看出, 其操作是根据startType(在Tokenizer中读出并区别的类型)区分去调用不同的处理函数执行.
按深度优先的递归的方式读完所有的解析函数, 就算是完成了语法解析的第一部分的解读.
/**
* 解析单个表达式
* 比如`class node {}`, `let n = new node();`, `const num = 1`都是一个单独的statement
* 所以区分的方式是分号, 换行, 或者大括号
* @param {null | string} context 作用在于判断上层类型, 具体而言, 是指do/if/while/for/with几种关键词.
* @param {boolean} topLevel 是否顶层解析, 非顶层解析时, 不允许import和export
* @param {Record<string, unknown>} exports 导出的类型对象
* @returns {Node}
*/
pp.parseStatement = function(context, topLevel, exports) {
let starttype = this.type, node = this.startNode(), kind
if (this.isLet(context)) {
// 如果是let表达式, tokenType转换为var
starttype = tt._var
kind = "let"
}
// Most types of statements are recognized by the keyword they
// start with. Many are trivial to parse, some require a bit of
// complexity.
switch (starttype) {
case tt._break: case tt._continue: return this.parseBreakContinueStatement(node, starttype.keyword)
case tt._debugger: return this.parseDebuggerStatement(node)
case tt._do: return this.parseDoStatement(node)
case tt._for: return this.parseForStatement(node)
case tt._function:
// Function as sole body of either an if statement or a labeled statement
// works, but not when it is part of a labeled statement that is the sole
// body of an if statement.
if ((context && (this.strict || context !== "if" && context !== "label")) && this.options.ecmaVersion >= 6) this.unexpected()
return this.parseFunctionStatement(node, false, !context)
case tt._class:
if (context) this.unexpected()
return this.parseClass(node, true)
case tt._if: return this.parseIfStatement(node)
case tt._return: return this.parseReturnStatement(node)
case tt._switch: return this.parseSwitchStatement(node)
case tt._throw: return this.parseThrowStatement(node)
case tt._try: return this.parseTryStatement(node)
case tt._const: case tt._var:
kind = kind || this.value
if (context && kind !== "var") this.unexpected()
return this.parseVarStatement(node, kind)
case tt._while: return this.parseWhileStatement(node)
case tt._with: return this.parseWithStatement(node)
case tt.braceL: return this.parseBlock(true, node)
case tt.semi: return this.parseEmptyStatement(node)
case tt._export:
case tt._import:
if (this.options.ecmaVersion > 10 && starttype === tt._import) {
skipWhiteSpace.lastIndex = this.pos
let skip = skipWhiteSpace.exec(this.input)
let next = this.pos + skip[0].length, nextCh = this.input.charCodeAt(next)
if (nextCh === 40 || nextCh === 46) // '(' or '.'
return this.parseExpressionStatement(node, this.parseExpression())
}
if (!this.options.allowImportExportEverywhere) {
if (!topLevel)
this.raise(this.start, "'import' and 'export' may only appear at the top level")
if (!this.inModule)
this.raise(this.start, "'import' and 'export' may appear only with 'sourceType: module'")
}
return starttype === tt._import ? this.parseImport(node) : this.parseExport(node, exports)
// If the statement does not start with a statement keyword or a
// brace, it's an ExpressionStatement or LabeledStatement. We
// simply start parsing an expression, and afterwards, if the
// next token is a colon and the expression was a simple
// Identifier node, we switch to interpreting it as a label.
default:
if (this.isAsyncFunction()) {
if (context) this.unexpected()
this.next()
return this.parseFunctionStatement(node, true, !context) // 这里!context为true
}
let maybeName = this.value, expr = this.parseExpression()
if (starttype === tt.name && expr.type === "Identifier" && this.eat(tt.colon))
return this.parseLabeledStatement(node, maybeName, expr, context)
else return this.parseExpressionStatement(node, expr)
}
}
简单介绍几个常见的函数作用
eat判断当前token是否传入的类型, 如果是则调用next并返回trueinsertSemicolon判断是否可插入分号的位置, 具体为判断是否}或者换行, 如果是则返回trueunexpected顾名思义, 抛出错误semicolon尝试消费一个分号, 如果没有, 则尝试插入一个分号, 如果都不行, 则报错.
parseBreakContinueStatement
解析break和continue的函数很清晰
先执行next获取下一个token, 然后
- 判断是否有分号或者换行, 如果是, 则设置label为null
- 判断当前token是否类型为name, 如果是, 则node.label 设置为
parseIdent解析出来的值, 否则报错 - 对labels进行检查, 对于continue关键词, 要求跳出的节点必须是循环, 对于break, 可以不是循环. labels的值可以在后面再看.
/**
* 解析break或者continue
* @param {Node} node
* @param {string} keyword
* @returns {Node}
*/
pp.parseBreakContinueStatement = function(node, keyword) {
let isBreak = keyword === "break"
this.next()
if (this.eat(tt.semi) || this.insertSemicolon()) node.label = null
else if (this.type !== tt.name) this.unexpected()
else {
node.label = this.parseIdent()
this.semicolon()
}
// Verify that there is an actual destination to break or
// continue to.
let i = 0
for (; i < this.labels.length; ++i) {
let lab = this.labels[i]
if (node.label == null || lab.name === node.label.name) {
// 判断continue或break, 如果是continue, 则要求是循环
if (lab.kind != null && (isBreak || lab.kind === "loop")) break
// break 到label.name上, 这里可以不是循环
if (node.label && isBreak) break
}
}
if (i === this.labels.length) this.raise(node.start, "Unsyntactic " + keyword)
return this.finishNode(node, isBreak ? NodeTypes.BreakStatement : NodeTypes.ContinueStatement)
}
parseIdent
- 首先判断type是否为name, 如果是, 则直接设置node.name
- 判断type.keyword, 如果存在, 则设置node.name 为type.keyword, 并且处理class和function关键词的类型, 如果是的话则弹出一个context.
/**
* 解析当前token并作为变量类型返回
* @param {boolean} liberal 是否解析properties
* @returns
*/
pp.parseIdent = function(liberal) {
let node = this.startNode()
if (this.type === tt.name) {
node.name = this.value
} else if (this.type.keyword) {
node.name = this.type.keyword
// To fix https://github.com/acornjs/acorn/issues/575
// `class` and `function` keywords push new context into this.context.
// But there is no chance to pop the context if the keyword is consumed as an identifier such as a property name.
// If the previous token is a dot, this does not apply because the context-managing code already ignored the keyword
if ((node.name === "class" || node.name === "function") &&
(this.lastTokEnd !== this.lastTokStart + 1 || this.input.charCodeAt(this.lastTokStart) !== 46)) { // 46 .
// 只有是xxx.class这种情况不需要pop
this.context.pop()
}
} else {
this.unexpected()
}
this.next(!!liberal)
this.finishNode(node, "Identifier")
if (!liberal) {
// 检查变量合法性
this.checkUnreserved(node)
if (node.name === "await" && !this.awaitIdentPos)
this.awaitIdentPos = node.start
}
return node
}
parseDoStatement
- 首先在labels中推入一个label, 用于continue和break的校验合法性
- 然后调用
parseStatement("do")解析出{}中的执行体 - 退出labels
- 读出while, 然后调用
parseParenExpression读出括号内的条件表达式, 结束
/**
解析do while
* @param {Node} node
* @returns {Node}
*/
pp.parseDoStatement = function(node) {
this.next()
// 记录label用于break或continue
this.labels.push(loopLabel)
node.body = this.parseStatement("do")
this.labels.pop()
this.expect(tt._while)
node.test = this.parseParenExpression()
if (this.options.ecmaVersion >= 6)
this.eat(tt.semi)
else
this.semicolon()
return this.finishNode(node, NodeTypes.DoWhileStatement)
}