编译器分为三个阶段
- 解析(Parsing) 解析是将最初原始的代码转换为一种更加抽象的表示(即AST)
- 转换(Transformation) 转换将对这个抽象的表示做一些处理,让它能做到编译器期望它做到的事情
- 代码生成(Code Generation) 接收处理之后的代码表示,然后把它转换成新的代码
实现
将 (add 2 2) 转换成
tokens = [
{type: "paren",value: "C"},
{type: "name",value: "add"},
{type: "number", value: "2"},
{type: "number",value: "2"},
{type: "paren",value: ")"}
]
//代码实现 tokenizer.js
const LETTERS = /[a-z]/i
const WHITESPACE = /\s/
const NUMBERS = /[0-9]/
function tokenizer(code) {
let tokens = []
let current = 0
while (current < code.length) {
let token = code[current]
if (token === "(") {
//开始符号
tokens.push({type: "paren",value: "(",})
current++
continue
} else if (LETTERS.test(token)) {
// add
let value = ""
while (LETTERS.test(token)) {
//内部循环获得add
value += token
token = code[++current]
}
tokens.push({type: "name",value,})
continue
} else if (WHITESPACE.test(token)) {
//忽略空格
current++
continue
} else if (NUMBERS.test(token)) {
// 数字 忽略数字
let value = ""
while (NUMBERS.test(token)) {
value += token
token = code[++current]
}
tokens.push({type: "number",value,})
continue
} else if (token === ")") {
//结束符号符号
tokens.push({type: "paren",value: ")",})
current++
continue
}
}
return tokens
}
console.log(tokenizer("(add 23 22)"))
- 2).语法分析 tokens转换成AST语法树 parser.js
function parser(program) {
let newAst = { //语法树开头就是Program
type: "Program",
body: [],
}
let current = 0
function walk(tokens) {
let token = tokens[current]
if (token.type === "paren" && token.value === "(") {
// CallExpression 表达式开头
token = tokens[++current]
let node = {type: "CallExpression",value: token.value,params: [],}
token = tokens[++current]
//如果不是paren 或者不是结束符号则循环直到结束
while (token.type !== "paren" ||(token.type === "paren" && token.value !== ")")
) {
node.params.push(walk(tokens))
token = tokens[current]
}
current++
return node
} else if (token.type === "number") {
current++
return {type: "NumericLiteral",value: token.value,}
}
}
while (current < tokens.length) {
newAst.body.push(walk(tokens))
}
return newAst
}
const tokens = tokenizer("(add 23 22)")
const ast = parser(tokens)
console.log(JSON.stringify(ast))
function traverse(code, visitor = {}) {
function traverArray(node, parent) {
node.forEach((item) => traverseNode(item, parent));
}
function traverseNode(node, parent) {
let visitorObj = visitor[node.type];
let enter, leave;
if (typeof visitorObj === "function") {
enter = visitorObj;
} else if (typeof visitorObj === "object") {
enter = visitorObj.enter;
leave = visitorObj.leave;
}
if (enter) enter(node, parent);
switch (node.type) {
case "Program":
traverArray(node.body, node);
break;
case "CallExpression":
traverArray(node.params, node);
break;
case "NumericLiteral":
break;
}
if (leave) leave(node, parent);
}
traverseNode(code, null);
}
let tokens = tokenizer("(add 23 22)");
let ast = parser(tokens);
traverse(ast, {
Program: {
enter(node) {
console.log("enter program");
},
leave() {
console.log("leave program");
},
},
CallExpression: {
enter() {
console.log("enter CallExpression");
},
leave() {
console.log("leave CallExpression");
},
},
});
- 4). 转换ast 语法树 transformer.js
function transform(astTree) {
let newAstTree = {
type: "Program",
body: [],
}
astTree._context = newAstTree.body
traverse(astTree, {
CallExpression(node, parent) {
let callExpression = {
type: "CallExpression",
callee: {
type: "Identifier",
name: node.value,
},
arguments: [],
}
node._context = callExpression.arguments
parent._context.push(callExpression)
},
NumericLiteral(node, parent) {
parent._context.push({
type: "NumericLiteral",
value: node.value,
})
},
})
return newAstTree
}
let tokens = tokenizer("(add 23 22)")
let ast = parser(tokens)
let newAst = transform(ast)
console.log(JSON.stringify(newAst))
function codeGen(ast) {
const type = ast.type;
switch (type) {
case "Program":
return ast.body.map(codeGen).join("\n");
case "CallExpression":
const { callee, arguments } = ast;
return callee.name + `(${arguments.map(codeGen).join(",")})`;
case "NumericLiteral":
return ast.value;
}
}
//执行代码
const tokens = tokenizer("(add 2 2)")
const ast = parser(tokens)
const newAst = transform(ast)
const finalCode = codeGen(newAst)
console.log(finalCode)