前端 中有也经常会用到编译原理中的知识,我们经常会听到compiler,ast,parse 等概念,很多项目中也应用到了这些概念,比如说vue 的sfc,react常用的jsx ,babel到es5的转换,v8引擎的实现等等。
刚开始直接硬撸源码肯定是很难的一件事情,我们需要一个简单的上手,需要了解概念,才能帮助我们更好的上手,否则大概率是一件劝退的事情。
这里我介绍github的一个高star的一个带你介绍编译的一个库the-super-tiny-compiler。(babel推荐的一个初始语法的库)
Most compilers break down into three primary stages: Parsing, Transformation,and Code Generation
| parse | 经历语法分析,词法分析 等过程,最终获取到抽象语法树 | |
|---|---|---|
| transformation | 经历遍历,访问者模式,把抽象语法树转换成想要的抽象语法树 | |
| code generation | 抽象语法树-------->最终代码 |
// talk is easy show me code
// 第一阶段词法分析(Lexical Analysis) raw code ----> tokens
// 第二阶段语法分析(Syntactic Analysis) tokens------> Abstract Syntax Tree(ast)
// tokenizer 依次遍历字符串中所有内容
// source
// (add 2 (subtract 4 2))
// dest
// [
// { type: 'paren', value: '(' },
// { type: 'name', value: 'add' },
// { type: 'number', value: '2' },
// { type: 'paren', value: '(' },
// { type: 'name', value: 'subtract' },
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' },
// { type: 'paren', value: ')' }
// ]
function tokenizer(input: string) {
let current: number = 0;
let tokens: token[] = [];
while (current < input.length) {
let char = input[current];
// 解析(
if (char === '(') {
tokens.push({
type: 'paren',
value: '(',
});
current++;
continue;
}
//解析)
if (char === ')') {
tokens.push({
type: 'paren',
value: ')',
});
current++;
continue;
}
//过滤空格
let WHITESPACE = /\s/;
if (WHITESPACE.test(char)) {
current++;
continue;
}
// 处理数字
let NUMBERS = /[0-9]/;
if (NUMBERS.test(char)) {
let value = '';
while (NUMBERS.test(char)) {
value += char;
char = input[++current];
}
tokens.push({ type: 'number', value });
continue;
}
// 处理字符串
if (char === '"') {
let value = '';
char = input[++current];
while (char !== '"') {
value += char;
char = input[++current];
}
char = input[++current];
tokens.push({ type: 'string', value });
continue;
}
// 处理关键字
let LETTERS = /[a-z]/i;
if (LETTERS.test(char)) {
let value = '';
while (LETTERS.test(char)) {
value += char;
char = input[++current];
}
tokens.push({ type: 'name', value });
continue;
}
throw new TypeError('I dont know what this character is: ' + char);
}
return tokens;
}
// [
// { type: 'paren', value: '(' },
// { type: 'name', value: 'add' },
// { type: 'number', value: '2' },
// { type: 'paren', value: '(' },
// { type: 'name', value: 'subtract' },
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' },
// { type: 'paren', value: ')' }
// ]
//{
// "type": "Program",
// "body": [
// {
// "type": "CallExpression",
// "name": "add",
// "params": [
// {
// "type": "NumberLiteral",
// "value": "2"
// },
// {
// "type": "CallExpression",
// "name": "subtract",
// "params": [
// {
// "type": "NumberLiteral",
// "value": "4"
// },
// {
// "type": "NumberLiteral",
// "value": "2"
// }
// ]
// }
// ]
// }
// ]
//}
function parser(tokens) {
let current = 0;
function walk() {
let token = tokens[current];
if (token.type === 'number') {
current++;
return {
type: 'NumberLiteral',
value: token.value,
};
}
if (token.type === 'string') {
current++;
return {
type: 'StringLiteral',
value: token.value,
};
}
//表达式解析
if (
token.type === 'paren' &&
token.value === '('
) {
token = tokens[++current];
let node = {
type: 'CallExpression',
name: token.value,
params: [],
};
token = tokens[++current];
//寻找括号中的内容
while (
(token.type !== 'paren') ||
(token.type === 'paren' && token.value !== ')')
) {
node.params.push(walk());
token = tokens[current];
}
current++;
return node;
}
throw new TypeError(token.type);
}
let ast = {
type: 'Program',
body: [],
};
while (current < tokens.length) {
ast.body.push(walk());
}
return ast;
}
//利用访问者模型,enter leave 对ast进行操作 (写过babel plugin 应该很熟悉)
function traverser(ast, visitor) {
function traverseArray(array, parent) {
array.forEach(child => {
traverseNode(child, parent);
});
}
function traverseNode(node, parent) {
let methods = visitor[node.type];
//先走 enter 逻辑
if (methods && methods.enter) {
methods.enter(node, parent);
}
switch (node.type) {
case 'Program':
traverseArray(node.body, node);
break;
case 'CallExpression':
traverseArray(node.params, node);
break;
case 'NumberLiteral':
case 'StringLiteral':
break;
default:
throw new TypeError(node.type);
}
// 退出时走 exit
if (methods && methods.exit) {
methods.exit(node, parent);
}
}
traverseNode(ast, null);
}
//通过_context 拿到当前元素父级上一层所对应的新ast的位置
/**
*
* ----------------------------------------------------------------------------
* Original AST | Transformed AST
* ----------------------------------------------------------------------------
* { | {
* type: 'Program', | type: 'Program',
* body: [{ | body: [{
* type: 'CallExpression', | type: 'ExpressionStatement',
* name: 'add', | expression: {
* params: [{ | type: 'CallExpression',
* type: 'NumberLiteral', | callee: {
* value: '2' | type: 'Identifier',
* }, { | name: 'add'
* type: 'CallExpression', | },
* name: 'subtract', | arguments: [{
* params: [{ | type: 'NumberLiteral',
* type: 'NumberLiteral', | value: '2'
* value: '4' | }, {
* }, { | type: 'CallExpression',
* type: 'NumberLiteral', | callee: {
* value: '2' | type: 'Identifier',
* }] | name: 'subtract'
* }] | },
* }] | arguments: [{
* } | type: 'NumberLiteral',
* | value: '4'
* ---------------------------------- | }, {
* | type: 'NumberLiteral',
* | value: '2'
* | }]
* (sorry the other one is longer.) | }
* | }
* | }]
* | }
* ----------------------------------------------------------------------------
*/
function transformer(ast) {
let newAst = {
type: 'Program',
body: [],
};
ast._context = newAst.body;
traverser(ast, {
NumberLiteral: {
enter(node, parent) {
parent._context.push({
type: 'NumberLiteral',
value: node.value,
});
},
},
StringLiteral: {
enter(node, parent) {
parent._context.push({
type: 'StringLiteral',
value: node.value,
});
},
},
CallExpression: {
enter(node, parent) {
let expression = {
type: 'CallExpression',
callee: {
type: 'Identifier',
name: node.name,
},
arguments: [],
};
node._context = expression.arguments;
if (parent.type !== 'CallExpression') {
expression = {
type: 'ExpressionStatement',
expression: expression,
};
}
parent._context.push(expression);
},
}
});
return newAst;
}
// 最后一步 代码生成 把ast按照自定义的语法规则转译成对应的目标代码
function codeGenerator(node) {
switch (node.type) {
case 'Program':
return node.body.map(codeGenerator)
.join('\n');
case 'ExpressionStatement':
return (
codeGenerator(node.expression) +
';'
);
case 'CallExpression':
return (
codeGenerator(node.callee) +
'(' +
node.arguments.map(codeGenerator)
.join(', ') +
')'
);
case 'Identifier':
return node.name;
case 'NumberLiteral':
return node.value;
case 'StringLiteral':
return '"' + node.value + '"';
default:
throw new TypeError(node.type);
}
}