流程
- 词法和语法的定义
- 词法分析
- 语法分析
- 遍历AST,输出结果
参考原文 winter老师的重学前端
1. 词法和语法的定义
1.1词法定义
找出关键内容做Token,如这里的数字+ 运算符就是Token,后续很多转化就是基于token做处理。
Token
- Number: 1 2 3 4 5 6 7 8 9 0 的组合
- Operator: + 、-、 *、 /
Whitespace:<sp>
LineTerminator:<LF><CR>
1.2语法的定义
几种结构的组合产生一个新的结构,也叫语法产生式。语法定义多数采用 BNF
1.2.1 总入口语法
Expression = 加法 + EOF
<!-- 总入口表达式-->
<Expression> ::= <AdditiveExpression><EOF>
1.2.2 加法语法
因为加减乘除有优先级,所以
- (加法) = (乘法)
- (加法) = (加法 + 乘法)
- (加法) = (加法 - 乘法) 可以是同时组合
<!-- 总入口表达式-->
<Expression> ::= <AdditiveExpression><EOF>
<!-- 加法语法 -->
<AdditiveExpression> ::=
<MultiplicativeExpression>
|<AdditiveExpression><+><MultiplicativeExpression>
|<AdditiveExpression><-><MultiplicativeExpression>
1.2.3 乘法语法
乘法的3种情况
- (乘法) = (Number)
- (乘法) = (乘法 * Number)
- (乘法) = (乘法 / Number)
<!-- 乘法语法 -->
<MultiplicativeExpression> ::=
<Number>
|<MultiplicativeExpression><*><Number>
|<MultiplicativeExpression></><Number>
2. 词法分析
通过状态机的方式,实现内容转化token。
代码实现
var token = [];//记录 单个连续的数字
var tokens = [];//记录所有token
const start = char => {
if(char === '1'
|| char === '2'
|| char === '3'
|| char === '4'
|| char === '5'
|| char === '6'
|| char === '7'
|| char === '8'
|| char === '9'
|| char === '0'
|| char === '.'
) {
token.push(char);
return inNumber;
}
if(char === '+'
|| char === '-'
|| char === '*'
|| char === '/'
) {
emmitToken(char, char);
return start
}
if(char === ' ') {
return start;
}
if(char === '\r'
|| char === '\n'
) {
return start;
}
}
const inNumber = char => {
if(char === '1'
|| char === '2'
|| char === '3'
|| char === '4'
|| char === '5'
|| char === '6'
|| char === '7'
|| char === '8'
|| char === '9'
|| char === '0'
|| char === '.'
) {
token.push(char);
return inNumber;
} else {
emmitToken("Number", token.join(""));
token = [];
return start(char); // put back char
}
}
//保存关键字token
function emmitToken(type, value) {
console.log(value);
tokens.push({type,value})
}
var input = "3 * 4 + 2.1 * 2"
var state = start;
for(var c of input.split('')) {
state = state(c);
}
state(Symbol('EOF'))
emmitToken("EOF");
console.log(tokens)
// 3
// *
// 4
// +
// 2.1
// *
// 2
3. 语法分析
通过递归遍历的方式,把词法分析的内容转化为语法分析内容。也就是AST抽象语法树。
//处理词法数据的总入口
function Expression(source){
}
//处理加法 (包含递归处理乘法)
function AdditiveExpression( ){
}
//处理加法
function MultiplicativeExpression(){
}
1.数据流变化:
已知已有以下的词法数据, 3 * 300 + 2 * 256 得到下面格式数据:
[
{
"type": "Number",
"value": "3"
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "300"
},
{
"type": "+",
"value": "+"
},
{
"type": "Number",
"value": "2"
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "256"
},
{
"type": "EOF"
}
]
第一次转化,把 3* 300 转发为 MultiplicativeExpression嵌套children 的数据格式
[
{
"type": "MultiplicativeExpression",
"operator": "*",
"children": [
{
"type": "MultiplicativeExpression",
"children": [
{
"type": "Number",
"value": "3"
}
]
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "300"
}
]
},
{
"type": "+",
"value": "+"
},
{
"type": "Number",
"value": "2"
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "256"
},
{
"type": "EOF"
}
]
同理,第二次转化,把 2 * 256 转发为 MultiplicativeExpression嵌套children 的数据格式
[
{
"type": "MultiplicativeExpression",
"operator": "*",
"children": [
{
"type": "MultiplicativeExpression",
"children": [
{
"type": "Number",
"value": "3"
}
]
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "300"
}
]
},
{
"type": "+",
"value": "+"
},
{
"type": "MultiplicativeExpression",
"operator": "*",
"children": [
{
"type": "MultiplicativeExpression",
"children": [
{
"type": "Number",
"value": "2"
}
]
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "256"
}
]
},
{
"type": "EOF"
}
]
最终,在最外层包装Expression类型节点,里面只包含两个节点
一个是:AdditiveExpression , 一个是EOF。
{
"type": "Expression",
"children": [
{
"type": "AdditiveExpression",
"operator": "+",
"children": [
{
"type": "AdditiveExpression",
"children": [
{
"type": "MultiplicativeExpression",
"operator": "*",
"children": [
{
"type": "MultiplicativeExpression",
"children": [
{
"type": "Number",
"value": "3"
}
]
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "300"
}
]
}
]
},
{
"type": "+",
"value": "+"
},
{
"type": "MultiplicativeExpression",
"operator": "*",
"children": [
{
"type": "MultiplicativeExpression",
"children": [
{
"type": "Number",
"value": "2"
}
]
},
{
"type": "*",
"value": "*"
},
{
"type": "Number",
"value": "256"
}
]
}
]
},
{
"type": "EOF"
}
]
}
2.代码实现
//总入口,一直展开表达式,直到只剩下两个对象 一个是AdditiveExpression 一个是EOF 代表结束。
function Expression(source){
debugger
//3. 当处理到最后一个节点 则设置顶节点,同时退出循环。
if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "EOF" ) {
let node = {
type:"Expression",
children:[source.shift(), source.shift()]
}
source.unshift(node);
return node;
}
AdditiveExpression(source);//1. 第一步默认处理加法的情况
return Expression(source); //2. 递归继续往下处理
}
//加法语法处理
function AdditiveExpression(source){
if(source[0].type === "MultiplicativeExpression") {//如果是乘法 递归继续处理加法
let node = {
type:"AdditiveExpression",
children:[source[0]]
}
source[0] = node;
return AdditiveExpression(source);// 递归继续处理加法
}
//边移除3个元素(两个数字和操作符),并且插入到子children集合里
if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "+") {
let node = {
type:"AdditiveExpression",
operator:"+",
children:[]
}
node.children.push(source.shift());
node.children.push(source.shift());
MultiplicativeExpression(source);
node.children.push(source.shift());
source.unshift(node);
return AdditiveExpression(source);
}
//边移除3个元素(两个数字和操作符),并且插入到子children集合里
if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "-") {
let node = {
type:"AdditiveExpression",
operator:"-",
children:[]
}
node.children.push(source.shift());
node.children.push(source.shift());
MultiplicativeExpression(source);
node.children.push(source.shift());
source.unshift(node);
return AdditiveExpression(source);
}
if(source[0].type === "AdditiveExpression")
return source[0];
MultiplicativeExpression(source);
return AdditiveExpression(source);
}
//乘法语法处理
function MultiplicativeExpression(source){
if(source[0].type === "Number") {
let node = {
type:"MultiplicativeExpression",
children:[source[0]]
}
source[0] = node;
return MultiplicativeExpression(source); //递归进来后会直接返回自己
}
//边移除3个元素(两个数字和操作符),并且插入到子children集合里
if(source[0].type === "MultiplicativeExpression" && source[1] && source[1].type === "*") {
let node = {
type:"MultiplicativeExpression",
operator:"*",
children:[]
}
node.children.push(source.shift());
node.children.push(source.shift());
node.children.push(source.shift());
source.unshift(node);
return MultiplicativeExpression(source);
}
//边移除3个元素(两个数字和操作符),并且插入到子children集合里
if(source[0].type === "MultiplicativeExpression"&& source[1] && source[1].type === "/") {
let node = {
type:"MultiplicativeExpression",
operator:"/",
children:[]
}
node.children.push(source.shift());
node.children.push(source.shift());
node.children.push(source.shift());
source.unshift(node);
return MultiplicativeExpression(source);
}
if(source[0].type === "MultiplicativeExpression")//如果类型为 MultiplicativeExpression 直接返回
return source[0];
return MultiplicativeExpression(source);
};
测试代码
//测试代码
var source = [{
type:"Number",
value: "3"
}, {
type:"*",
value: "*"
}, {
type:"Number",
value: "300"
}, {
type:"+",
value: "+"
}, {
type:"Number",
value: "2"
}, {
type:"*",
value: "*"
}, {
type:"Number",
value: "256"
}, {
type:"EOF"
}];
var ast = Expression(source); //得到抽象语法树
// console.log(ast);
// 3 * 300 + 2 * 256
// console.log(ast.children[0].children);
// console.log(ast.children[0].children[0].children[0].children);
// console.log(ast.children[0].children[0].children[0].children[0].children);
console.log( JSON.stringify(ast))
4. 解析执行
遍历整棵AST,并输出结果
代码实现
function evaluate(node) {
if(node.type === "Expression") {
return evaluate(node.children[0])
}
if(node.type === "AdditiveExpression") {
if(node.operator === '-') {
return evaluate(node.children[0]) - evaluate(node.children[2]);
}
if(node.operator === '+') {
return evaluate(node.children[0]) + evaluate(node.children[2]);
}
return evaluate(node.children[0])
}
if(node.type === "MultiplicativeExpression") {
if(node.operator === '*') {
return evaluate(node.children[0]) * evaluate(node.children[2]);
}
if(node.operator === '/') {
return evaluate(node.children[0]) / evaluate(node.children[2]);
}
return evaluate(node.children[0])
}
if(node.type === "Number") {
return Number(node.value);
}
}