JS 四则运算解析器实现

460 阅读4分钟

流程

  • 词法和语法的定义
  • 词法分析
  • 语法分析
  • 遍历AST,输出结果

参考原文 winter老师的重学前端

1. 词法和语法的定义

1.1词法定义

找出关键内容做Token,如这里的数字+ 运算符就是Token,后续很多转化就是基于token做处理。

Token

  • Number: 1 2 3 4 5 6 7 8 9 0 的组合
  • Operator: + 、-、 *、 /

Whitespace:<sp>

LineTerminator:<LF><CR>

1.2语法的定义

几种结构的组合产生一个新的结构,也叫语法产生式。语法定义多数采用 BNF

1.2.1 总入口语法

Expression = 加法 + EOF

<!-- 总入口表达式-->
<Expression> ::= <AdditiveExpression><EOF>

1.2.2 加法语法

因为加减乘除有优先级,所以

  • (加法) = (乘法)
  • (加法) = (加法 + 乘法)
  • (加法) = (加法 - 乘法) 可以是同时组合
<!-- 总入口表达式-->
<Expression> ::= <AdditiveExpression><EOF>
<!-- 加法语法 -->
<AdditiveExpression> ::= 
    <MultiplicativeExpression>
    |<AdditiveExpression><+><MultiplicativeExpression>
    |<AdditiveExpression><-><MultiplicativeExpression>

1.2.3 乘法语法

乘法的3种情况

  • (乘法) = (Number)
  • (乘法) = (乘法 * Number)
  • (乘法) = (乘法 / Number)
 
<!-- 乘法语法 -->
<MultiplicativeExpression> ::= 
    <Number>
    |<MultiplicativeExpression><*><Number>
    |<MultiplicativeExpression></><Number>   

2. 词法分析

通过状态机的方式,实现内容转化token。

代码实现


var token = [];//记录 单个连续的数字
var tokens = [];//记录所有token 
const start = char => {
    if(char === '1' 
        || char === '2'
        || char === '3'
        || char === '4'
        || char === '5'
        || char === '6'
        || char === '7'
        || char === '8'
        || char === '9'
        || char === '0'
        || char === '.'
    ) {
        token.push(char);
        return inNumber;   
    }
    if(char === '+' 
        || char === '-'
        || char === '*'
        || char === '/'
    ) {
        emmitToken(char, char);
        return start
    }
    if(char === ' ') {
        return start;
    }
    if(char === '\r' 
        || char === '\n'
    ) {
        return start;
    }
}
const inNumber = char => {
    if(char === '1' 
        || char === '2'
        || char === '3'
        || char === '4'
        || char === '5'
        || char === '6'
        || char === '7'
        || char === '8'
        || char === '9'
        || char === '0'
        || char === '.'
    ) {
        token.push(char);
        return inNumber;
    } else {
        emmitToken("Number", token.join(""));
        token = [];
        return start(char); // put back char
    }
}
//保存关键字token
function emmitToken(type, value) {
    console.log(value);
    tokens.push({type,value})
}

var input = "3 * 4 + 2.1 * 2"
var state = start;
for(var c of input.split('')) {
    state = state(c);
}
state(Symbol('EOF'))
emmitToken("EOF");
console.log(tokens)
// 3
// *
// 4
// +
// 2.1
// *
// 2

3. 语法分析

通过递归遍历的方式,把词法分析的内容转化为语法分析内容。也就是AST抽象语法树。

//处理词法数据的总入口
function Expression(source){

}
//处理加法 (包含递归处理乘法)
function AdditiveExpression( ){

}
//处理加法
function MultiplicativeExpression(){
    
}

1.数据流变化:

已知已有以下的词法数据, 3 * 300 + 2 * 256 得到下面格式数据:

[
    {
        "type": "Number",
        "value": "3"
    },
    {
        "type": "*",
        "value": "*"
    },
    {
        "type": "Number",
        "value": "300"
    },
    {
        "type": "+",
        "value": "+"
    },
    {
        "type": "Number",
        "value": "2"
    },
    {
        "type": "*",
        "value": "*"
    },
    {
        "type": "Number",
        "value": "256"
    },
    {
        "type": "EOF"
    }
]

第一次转化,把 3* 300 转发为 MultiplicativeExpression嵌套children 的数据格式

[
    {
        "type": "MultiplicativeExpression",
        "operator": "*",
        "children": [
            {
                "type": "MultiplicativeExpression",
                "children": [
                    {
                        "type": "Number",
                        "value": "3"
                    }
                ]
            },
            {
                "type": "*",
                "value": "*"
            },
            {
                "type": "Number",
                "value": "300"
            }
        ]
    },
    {
        "type": "+",
        "value": "+"
    },
    {
        "type": "Number",
        "value": "2"
    },
    {
        "type": "*",
        "value": "*"
    },
    {
        "type": "Number",
        "value": "256"
    },
    {
        "type": "EOF"
    }
]

同理,第二次转化,把 2 * 256 转发为 MultiplicativeExpression嵌套children 的数据格式

[
    {
        "type": "MultiplicativeExpression",
        "operator": "*",
        "children": [
            {
                "type": "MultiplicativeExpression",
                "children": [
                    {
                        "type": "Number",
                        "value": "3"
                    }
                ]
            },
            {
                "type": "*",
                "value": "*"
            },
            {
                "type": "Number",
                "value": "300"
            }
        ]
    },
    {
        "type": "+",
        "value": "+"
    },
    {
        "type": "MultiplicativeExpression",
        "operator": "*",
        "children": [
            {
                "type": "MultiplicativeExpression",
                "children": [
                    {
                        "type": "Number",
                        "value": "2"
                    }
                ]
            },
            {
                "type": "*",
                "value": "*"
            },
            {
                "type": "Number",
                "value": "256"
            }
        ]
    },
    {
        "type": "EOF"
    }
]

最终,在最外层包装Expression类型节点,里面只包含两个节点

一个是:AdditiveExpression , 一个是EOF。

{
    "type": "Expression",
    "children": [
        {
            "type": "AdditiveExpression",
            "operator": "+",
            "children": [
                {
                    "type": "AdditiveExpression",
                    "children": [
                        {
                            "type": "MultiplicativeExpression",
                            "operator": "*",
                            "children": [
                                {
                                    "type": "MultiplicativeExpression",
                                    "children": [
                                        {
                                            "type": "Number",
                                            "value": "3"
                                        }
                                    ]
                                },
                                {
                                    "type": "*",
                                    "value": "*"
                                },
                                {
                                    "type": "Number",
                                    "value": "300"
                                }
                            ]
                        }
                    ]
                },
                {
                    "type": "+",
                    "value": "+"
                },
                {
                    "type": "MultiplicativeExpression",
                    "operator": "*",
                    "children": [
                        {
                            "type": "MultiplicativeExpression",
                            "children": [
                                {
                                    "type": "Number",
                                    "value": "2"
                                }
                            ]
                        },
                        {
                            "type": "*",
                            "value": "*"
                        },
                        {
                            "type": "Number",
                            "value": "256"
                        }
                    ]
                }
            ]
        },
        {
            "type": "EOF"
        }
    ]
}

2.代码实现

//总入口,一直展开表达式,直到只剩下两个对象 一个是AdditiveExpression 一个是EOF 代表结束。
function Expression(source){
    debugger
    //3. 当处理到最后一个节点 则设置顶节点,同时退出循环。
    if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "EOF" ) {
        let node = {
            type:"Expression",
            children:[source.shift(), source.shift()]
        }
        source.unshift(node);
        return node;
    }
    AdditiveExpression(source);//1. 第一步默认处理加法的情况
    return Expression(source); //2. 递归继续往下处理
}
//加法语法处理
function AdditiveExpression(source){
    if(source[0].type === "MultiplicativeExpression") {//如果是乘法 递归继续处理加法
        let node = {
            type:"AdditiveExpression",
            children:[source[0]]
        }
        source[0] = node;
        return AdditiveExpression(source);// 递归继续处理加法
    } 
    //边移除3个元素(两个数字和操作符),并且插入到子children集合里
    if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "+") {
        let node = {
            type:"AdditiveExpression",
            operator:"+",
            children:[]
        }
        node.children.push(source.shift());
        node.children.push(source.shift());
        MultiplicativeExpression(source);
        node.children.push(source.shift());
        source.unshift(node);
        return AdditiveExpression(source);
    }
     //边移除3个元素(两个数字和操作符),并且插入到子children集合里
    if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "-") {
        let node = {
            type:"AdditiveExpression",
            operator:"-",
            children:[]
        }
        node.children.push(source.shift());
        node.children.push(source.shift());
        MultiplicativeExpression(source);
        node.children.push(source.shift());
        source.unshift(node);
        return AdditiveExpression(source);
    }
    if(source[0].type === "AdditiveExpression")
        return source[0];
    MultiplicativeExpression(source);
    return AdditiveExpression(source);
}
//乘法语法处理
function MultiplicativeExpression(source){
    if(source[0].type === "Number") {
        let node = {
            type:"MultiplicativeExpression",
            children:[source[0]]
        }
        source[0] = node;
        return MultiplicativeExpression(source); //递归进来后会直接返回自己
    } 
     //边移除3个元素(两个数字和操作符),并且插入到子children集合里
    if(source[0].type === "MultiplicativeExpression" && source[1] && source[1].type === "*") {
        let node = {
            type:"MultiplicativeExpression",
            operator:"*",
            children:[]
        }
        node.children.push(source.shift());
        node.children.push(source.shift());
        node.children.push(source.shift());
        source.unshift(node);
        return MultiplicativeExpression(source);
    }
     //边移除3个元素(两个数字和操作符),并且插入到子children集合里
    if(source[0].type === "MultiplicativeExpression"&& source[1] && source[1].type === "/") {
        let node = {
            type:"MultiplicativeExpression",
            operator:"/",
            children:[]
        }
        node.children.push(source.shift());
        node.children.push(source.shift());
        node.children.push(source.shift());
        source.unshift(node);
        return MultiplicativeExpression(source);
    }
    if(source[0].type === "MultiplicativeExpression")//如果类型为 MultiplicativeExpression 直接返回
        return source[0];

    return MultiplicativeExpression(source);
};

测试代码

//测试代码
var source = [{
    type:"Number",
    value: "3"
}, {
    type:"*",
    value: "*"
}, {
    type:"Number",
    value: "300"
}, {
    type:"+",
    value: "+"
}, {
    type:"Number",
    value: "2"
}, {
    type:"*",
    value: "*"
}, {
    type:"Number",
    value: "256"
}, {
    type:"EOF"
}];
var ast = Expression(source); //得到抽象语法树

// console.log(ast);
// 3 * 300 + 2 * 256
// console.log(ast.children[0].children);
// console.log(ast.children[0].children[0].children[0].children);
// console.log(ast.children[0].children[0].children[0].children[0].children);
console.log( JSON.stringify(ast))

4. 解析执行

遍历整棵AST,并输出结果

代码实现

function evaluate(node) {
    if(node.type === "Expression") {
        return evaluate(node.children[0])
    }
    if(node.type === "AdditiveExpression") {
        if(node.operator === '-') {
            return evaluate(node.children[0]) - evaluate(node.children[2]);
        }
        if(node.operator === '+') {
            return evaluate(node.children[0]) + evaluate(node.children[2]);
        }
        return evaluate(node.children[0])
    }
    if(node.type === "MultiplicativeExpression") {
        if(node.operator === '*') {
            return evaluate(node.children[0]) * evaluate(node.children[2]);
        }
        if(node.operator === '/') {
            return evaluate(node.children[0]) / evaluate(node.children[2]);
        }
        return evaluate(node.children[0])
    }
    if(node.type === "Number") {
        return Number(node.value);
    }
}