接上篇词法分析1
其次看字符串读取
readString 往下读字符串, 直到遇到同样的quote后结束
/**
* 读到单双引号时进入, 读出一个string
* @param {number} quote
* @returns {Token}
*/
pp.readString = function(quote) {
let out = "", chunkStart = ++this.pos
for (;;) {
if (this.pos >= this.input.length) this.raise(this.start, "Unterminated string constant")
let ch = this.input.charCodeAt(this.pos)
if (ch === quote) break
if (ch === 92) { // '\'
out += this.input.slice(chunkStart, this.pos)
out += this.readEscapedChar(false)
chunkStart = this.pos
} else if (ch === 0x2028 || ch === 0x2029) {
if (this.options.ecmaVersion < 10) this.raise(this.start, "Unterminated string constant")
++this.pos
if (this.options.locations) {
this.curLine++
this.lineStart = this.pos
}
} else {
if (isNewLine(ch)) this.raise(this.start, "Unterminated string constant")
++this.pos
}
}
out += this.input.slice(chunkStart, this.pos++)
return this.finishToken(tt.string, out)
}
其次是根据不同的字符, 选择不同的函数调用
case 47: // '/'
return this.readToken_slash()
case 37: case 42: // '%*'
return this.readToken_mult_modulo_exp(code)
case 124: case 38: // '|&'
return this.readToken_pipe_amp(code)
case 94: // '^'
return this.readToken_caret()
case 43: case 45: // '+-'
return this.readToken_plus_min(code)
case 60: case 62: // '<>'
return this.readToken_lt_gt(code)
case 61: case 33: // '=!'
return this.readToken_eq_excl(code)
case 63: // '?'
return this.readToken_question()
case 126: // '~'
return this.finishOp(tt.prefix, 1)
case 35: // '#' 开头, 在ecma版本较新的情况下表示私有变量名.
return this.readToken_numberSign()
比较常见的是readToken_question
/** 读出问号 */
pp.readToken_question = function() { // '?'
const ecmaVersion = this.options.ecmaVersion
if (ecmaVersion >= 11) {
let next = this.input.charCodeAt(this.pos + 1)
if (next === 46) { // .
let next2 = this.input.charCodeAt(this.pos + 2)
if (next2 < 48 || next2 > 57) return this.finishOp(tt.questionDot, 2)
}
if (next === 63) { // ?
if (ecmaVersion >= 12) {
let next2 = this.input.charCodeAt(this.pos + 2)
if (next2 === 61) return this.finishOp(tt.assign, 3) // ??=
}
return this.finishOp(tt.coalesce, 2) // ??
}
}
return this.finishOp(tt.question, 1) // 三元表达式
}
接下来继续看readWord
/**
* 读出一个合法的变量名, 如果是keyword, 则作为keyword返回, 否则作为变量返回.
* let 关键词被单独处理, 会作为name返回
*/
pp.readWord = function() {
let word = this.readWord1()
let type = tt.name
if (this.keywords.test(word)) {
type = keywordTypes[word]
}
return this.finishToken(type, word)
}
/**
* 往后读出一个变量名.
* @returns {string}
*/
pp.readWord1 = function() {
this.containsEsc = false
let word = "", first = true, chunkStart = this.pos
let astral = this.options.ecmaVersion >= 6
while (this.pos < this.input.length) {
let ch = this.fullCharCodeAtPos()
if (isIdentifierChar(ch, astral)) { // 正常变量
this.pos += ch <= 0xffff ? 1 : 2
} else if (ch === 92) { // "\"
this.containsEsc = true
word += this.input.slice(chunkStart, this.pos)
let escStart = this.pos
if (this.input.charCodeAt(++this.pos) !== 117) // "u"
this.invalidStringToken(this.pos, "Expecting Unicode escape sequence \\uXXXX")
++this.pos
let esc = this.readCodePoint()
if (!(first ? isIdentifierStart : isIdentifierChar)(esc, astral))
this.invalidStringToken(escStart, "Invalid Unicode escape")
word += codePointToString(esc)
chunkStart = this.pos
} else {
break
}
first = false
}
return word + this.input.slice(chunkStart, this.pos)
}
总结
- 在readToken的时候, 首先判断当前是否合法的变量名开始(以字母或者_开头), 如果是, 则调用readWord读出keyword或者变量名
- 如果不是变量名开始, 则调用getTokenFromCode读出对应的类型, 函数内部根据不同的字符, 区别调用其他函数解析出对应的Token类型.