JS实现一个简单的 (add 2 2) => add(2, 2) 转换的编译器

282 阅读1分钟

编译器分为三个阶段

  • 解析(Parsing) 解析是将最初原始的代码转换为一种更加抽象的表示(即AST)
  • 转换(Transformation) 转换将对这个抽象的表示做一些处理,让它能做到编译器期望它做到的事情
  • 代码生成(Code Generation) 接收处理之后的代码表示,然后把它转换成新的代码

实现

  • 1). 词法分析
将  (add 2 2) 转换成 
tokens = [
	{type: "paren",value: "C"}, 
	{type: "name",value: "add"}, 
	{type: "number", value: "2"}, 
	{type: "number",value: "2"}, 
	{type: "paren",value: ")"}
]
//代码实现 tokenizer.js
const LETTERS = /[a-z]/i;
const WHITESPACE = /\s/;
const NUMBERS = /[0-9]/;

function tokenizer(code) {
  let tokens = []; //存放解析出来的token
  let current = 0; //下标指针
  while (current < code.length) {
    let token = code[current];
    if (token === "(") {
      //开始符号
      tokens.push({type: "paren",value: "(",});
      current++;
      continue;
    } else if (LETTERS.test(token)) {
      // add
      let value = "";
      while (LETTERS.test(token)) {
        //内部循环获得add
        value += token;
        token = code[++current];
      }
      tokens.push({type: "name",value,});
      continue;
    } else if (WHITESPACE.test(token)) {
      //忽略空格
      current++;
      continue;
    } else if (NUMBERS.test(token)) {
      // 数字 忽略数字
      let value = "";
      while (NUMBERS.test(token)) {
        value += token;
        token = code[++current];
      }
      tokens.push({type: "number",value,});
      continue;
    } else if (token === ")") {
      //结束符号符号
      tokens.push({type: "paren",value: ")",});
      current++;
      continue;
    }
  }
  return tokens;
}
console.log(tokenizer("(add 23 22)"));
  • 2).语法分析 tokens转换成AST语法树 parser.js
function parser(program) {
  let newAst = { //语法树开头就是Program
    type: "Program",
    body: [],
  };
  let current = 0;

  function walk(tokens) {
    let token = tokens[current];
    if (token.type === "paren" && token.value === "(") {
      // CallExpression 表达式开头
      token = tokens[++current];
      let node = {type: "CallExpression",value: token.value,params: [],};
      token = tokens[++current];
      //如果不是paren 或者不是结束符号则循环直到结束
      while (token.type !== "paren" ||(token.type === "paren" && token.value !== ")")
      ) {
        node.params.push(walk(tokens));
        token = tokens[current];
      }
      current++;
      return node;
    } else if (token.type === "number") {
      current++;
      return {type: "NumericLiteral",value: token.value,};
    }
  }

  while (current < tokens.length) {
    newAst.body.push(walk(tokens));
  }
  return newAst;
}

const tokens = tokenizer("(add 23 22)");
const ast = parser(tokens)
console.log(JSON.stringify(ast));
  • 3). 遍历语法树 深度优先
// traverser.js

function traverse(code, visitor = {}) {
  function traverArray(node, parent) {
    node.forEach((item) => traverseNode(item, parent));
  }

  function traverseNode(node, parent) {
    //遍历节点
    let visitorObj = visitor[node.type];
    let enter, leave;
    if (typeof visitorObj === "function") {
      enter = visitorObj;
    } else if (typeof visitorObj === "object") {
      enter = visitorObj.enter;
      leave = visitorObj.leave;
    }

    if (enter) enter(node, parent);
    switch (node.type) {
      case "Program":
        traverArray(node.body, node);
        break;
      case "CallExpression":
        traverArray(node.params, node);
        break;
      case "NumericLiteral":
        break;
    }
    if (leave) leave(node, parent);
  }
  traverseNode(code, null);
}

let tokens = tokenizer("(add 23 22)");
let ast = parser(tokens);

/**
 * enter program
   enter CallExpression
   leave CallExpression
   leave program
 */
traverse(ast, {
  Program: {
    enter(node) {
      console.log("enter program");
    },
    leave() {
      console.log("leave program");
    },
  },
  CallExpression: {
    enter() {
      console.log("enter CallExpression");
    },
    leave() {
      console.log("leave CallExpression");
    },
  },
});

  • 4). 转换ast 语法树 transformer.js
function transform(astTree) {
  let newAstTree = {
    type: "Program",
    body: [],
  };
  astTree._context = newAstTree.body;
  traverse(astTree, {
    CallExpression(node, parent) {
      let callExpression = {
        type: "CallExpression",
        callee: {
          type: "Identifier",
          name: node.value,
        },
        arguments: [],
      };
      node._context = callExpression.arguments;
      parent._context.push(callExpression);
    },
    NumericLiteral(node, parent) {
      parent._context.push({
        type: "NumericLiteral",
        value: node.value,
      });
    },
  });

  return newAstTree;
}
let tokens = tokenizer("(add 23 22)");
let ast = parser(tokens);
let newAst = transform(ast);
console.log(JSON.stringify(newAst));
  • 5). 代码生成 codeGen.js
function codeGen(ast) {
  const type = ast.type;
  switch (type) {
    case "Program":
      return ast.body.map(codeGen).join("\n");
    case "CallExpression":
      const { callee, arguments } = ast;
      return callee.name + `(${arguments.map(codeGen).join(",")})`;
    case "NumericLiteral":
      return ast.value;
  }
}
//执行代码
const tokens = tokenizer("(add 2 2)");
const ast = parser(tokens);
const newAst = transform(ast);
const finalCode = codeGen(newAst);
console.log(finalCode); //add(2,2)