# 编译[2]执行一行四则运算

1,408 阅读18分钟

## Lex

`Lex`定义token匹配规则，完整代码文件在这里

``````
%{
......
#include "nl.h"
#include "y.tab.h"
......
%}

%%
"-" return SUB;
"*" return MUL;
"/" return DIV;
"%" return MOD;
"(" return LP;
")" return RP;
"\n" return CR;
([1-9][0-9]*)|"0" {
Expression *expression = nl_alloc_expression(INT_EXPRESSION);
sscanf(yytext, "%d", &expression->u.int_value);
yylval.expression = expression;
return INT_LITERAL;
}
[0-9]+\.[0-9]+ {
Expression *expression = nl_alloc_expression(DOUBLE_EXPRESSION);
sscanf(yytext, "%lf", &expression->u.double_value);
yylval.expression = expression;
return DOUBLE_LITERAL;
}
[ \t] ;
. {
printf("lexical error with unexpected charactor %s\n", yytext);
exit(1);
}
%%
``````

``````// 匹配整数的正则，要么是0，要么是非0开头后续跟0到9的数字
([1-9][0-9]*)|"0" {
// 在另外的C文件定义的表达式类型，以及创建表达式的方法，方法传进去的参数指明了要创建的表达式是整数表达式
Expression *expression = nl_alloc_expression(INT_EXPRESSION);
// 匹配命中的字符串存放在yytext这个外部变量中，这里就是把匹配到的整数字符串转换成整数，再赋值给表达式的值
sscanf(yytext, "%d", &expression->u.int_value);
// 把创建的表达式对象存起来，YACC那边运行时可以拿到
yylval.expression = expression;
// 返回token类型，YACC构造语法逻辑时要用
return INT_LITERAL;
}
``````

``````[ \t] ;
``````

## 类型定义

``````typedef enum {
INT_VALUE = 1,
DOUBLE_VAULE
} ValueType;

typedef struct {
ValueType type;
union {
int int_value;
double double_value;
} u;
} NL_Value;
``````

``````typedef enum {
INT_EXPRESSION = 1,
DOUBLE_EXPRESSION,
SUB_EXPRESSION,
MUL_EXPRESSION,
DIV_EXPRESSION,
MOD_EXPRESSION,
EXPRESSION_TYPE_PLUS
} ExpressionType;

struct Expression_tag {
ExpressionType type;
union {
int int_value;
double double_value;
} u;
};
``````

``````/* create.c */
Expression *nl_alloc_expression(ExpressionType type);
Expression *nl_create_minus_expression(Expression *exp);
Expression *nl_create_binary_expression(ExpressionType type, Expression *left, Expression *right);

/* eval.c */
NL_Value nl_eval_binary_expression(ExpressionType operator, Expression *left, Expression *right);
NL_Value nl_eval_expression(Expression *exp);
void nl_print_value(NL_Value *v);
``````

## 方法定义

`create.c`文件，完整文件在这里

``````#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "nl.h"
``````

``````Expression *
nl_alloc_expression(ExpressionType type) {
Expression *exp;
exp = malloc(sizeof(Expression));
exp->type = type;

return exp;
}
``````

``````static Expression
convert_value_to_expression(NL_Value *v) {
Expression exp;

if (v->type == INT_VALUE) {
exp.type = INT_EXPRESSION;
exp.u.int_value = v->u.int_value;
} else if (v->type == DOUBLE_VAULE) {
exp.type = DOUBLE_EXPRESSION;
exp.u.double_value = v->u.double_value;
} else {
printf("[runtime error] convert value with unexpected type:%d\n", v->type);
exit(1);
}
return exp;
}
``````

``````Expression *
nl_create_binary_expression(ExpressionType type, Expression *left, Expression *right) {
NL_Value v;
v = nl_eval_binary_expression(type, left, right);

*left = convert_value_to_expression(&v);
return left;
}
``````

``````Expression *
nl_create_minus_expression(Expression *exp) {
if (exp->type == INT_EXPRESSION) {
exp->u.int_value = -exp->u.int_value;
} else if (exp->type == DOUBLE_EXPRESSION) {
exp->u.double_value = -exp->u.double_value;
}
return exp;
}
``````

`create.c`就只定义了这几个方法。

`eval.c`文件，完整文件在这里

``````#include "nl.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
``````

``````static NL_Value
eval_int_expression(int value) {
NL_Value v;
v.type = INT_VALUE;
v.u.int_value = value;
return v;
}

static NL_Value
eval_double_expression(double value) {
NL_Value v;
v.type = DOUBLE_VAULE;
v.u.double_value = value;
return v;
}
``````

``````static NL_Value
eval_expression(Expression *exp) {
NL_Value v;
switch (exp->type) {
case INT_EXPRESSION: {
v = eval_int_expression(exp->u.int_value);
break;
}
case DOUBLE_EXPRESSION: {
v = eval_double_expression(exp->u.double_value);
break;
}
case SUB_EXPRESSION:
case MUL_EXPRESSION:
case DIV_EXPRESSION:
case MOD_EXPRESSION:
case EXPRESSION_TYPE_PLUS:
default: {
printf("[runtime error] eval expression with unexpected type:%d\n", exp->type);
exit(1);
}
}
return v;
}
``````

``````static void
eval_binary_int(ExpressionType operator, int left, int right, NL_Value *result) {
result->type = INT_VALUE;

switch (operator) {
result->u.int_value = left + right;
break;
}
case SUB_EXPRESSION: {
result->u.int_value = left - right;
break;
}
case MUL_EXPRESSION: {
result->u.int_value = left * right;
break;
}
case DIV_EXPRESSION: {
result->u.int_value = left / right;
break;
}
case MOD_EXPRESSION: {
result->u.int_value = left % right;
break;
}
case INT_EXPRESSION:
case DOUBLE_EXPRESSION:
case EXPRESSION_TYPE_PLUS:
default: {
printf("[runtime error] eval binary int with unexpected type:%d\n", operator);
exit(1);
}
}
}

static void
eval_binary_double(ExpressionType operator, double left, double right, NL_Value *result) {
result->type = DOUBLE_VAULE;

switch (operator) {
result->u.double_value = left + right;
break;
}
case SUB_EXPRESSION: {
result->u.double_value = left - right;
break;
}
case MUL_EXPRESSION: {
result->u.double_value = left * right;
break;
}
case DIV_EXPRESSION: {
result->u.double_value = left / right;
break;
}
case MOD_EXPRESSION: {
result->u.double_value = fmod(left, right);
break;
}
case INT_EXPRESSION:
case DOUBLE_EXPRESSION:
case EXPRESSION_TYPE_PLUS:
default: {
printf("[runtime error] eval binary int with unexpected type:%d\n", operator);
exit(1);
}
}
}
``````

``````NL_Value
nl_eval_binary_expression(ExpressionType operator, Expression *left, Expression *right) {
NL_Value left_val;
NL_Value right_val;
NL_Value result;
left_val = eval_expression(left);
right_val = eval_expression(right);

if (left_val.type == INT_VALUE && right_val.type == INT_VALUE) {
eval_binary_int(operator, left_val.u.int_value, right_val.u.int_value, &result);
} else if (left_val.type == DOUBLE_VAULE && right_val.type == DOUBLE_VAULE) {
eval_binary_double(operator, left_val.u.double_value, right_val.u.double_value, &result);
} else if (left_val.type == INT_VALUE && right_val.type == DOUBLE_VAULE) {
left_val.u.double_value = left_val.u.int_value;
eval_binary_double(operator, left_val.u.double_value, right_val.u.double_value, &result);
} else if (left_val.type == DOUBLE_VAULE && right_val.type == INT_VALUE) {
right_val.u.double_value = right_val.u.int_value;
eval_binary_double(operator, left_val.u.double_value, right_val.u.double_value, &result);
} else {
printf("[runtime error] eval binary expression with unexpected type, left:%d, right:%d\n", left_val.type, right_val.type);
exit(1);
}

return result;
}
``````

``````NL_Value
nl_eval_expression(Expression *exp) {
return eval_expression(exp);
}
``````

``````void
nl_print_value(NL_Value *v) {
if (v->type == INT_VALUE) {
printf("--> %d\n", v->u.int_value);
} else if (v->type == DOUBLE_VAULE) {
printf("--> %lf\n", v->u.double_value);
}
}
``````

## YACC

`YACC`文件分成三部分，先说第一部分

``````%{
#include <stdio.h>
#include <stdlib.h>
#include "nl.h"
int yylex();
int yyerror(char const *str);
%}
``````

``````%union {
Expression *expression;
}
``````

``````%token ADD SUB MUL DIV LP RP MOD CR
%token <expression> INT_LITERAL DOUBLE_LITERAL
``````

``````%type <expression> primary_expression mult_expression add_expression expression
%%
``````

``````expression_list
: expression
| expression_list expression
;
``````

3 + 6;

4 - 19 - 4;

23 + 45 * 34;

``````expression
{
NL_Value v = nl_eval_expression(\$1);
nl_print_value(&v);
}
;
``````

`nl_eval_expression`计算表达式的值，返回`NL_Value`类型的值，然后调用`nl_print_value`方法打印出来。

``````add_expression
: mult_expression
{
}
{
\$\$ = nl_create_binary_expression(SUB_EXPRESSION, \$1, \$3);
}
;
``````

``````mult_expression
: primary_expression
| mult_expression MUL primary_expression
{
\$\$ = nl_create_binary_expression(MUL_EXPRESSION, \$1, \$3);
}
| mult_expression DIV primary_expression
{
\$\$ = nl_create_binary_expression(DIV_EXPRESSION, \$1, \$3);
}
| mult_expression MOD primary_expression
{
\$\$ = nl_create_binary_expression(MOD_EXPRESSION, \$1, \$3);
}
;
``````

``````primary_expression
: SUB primary_expression
{
\$\$ = nl_create_minus_expression(\$2);
}
| LP expression RP
{
\$\$ = \$2;
}
| INT_LITERAL
| DOUBLE_LITERAL
;
``````

3

3 + 2

3可以一路属于到`add_expression`，同理，2属于`mult_expression`，它们之间有个加号，就满足了`add_expression ADD mult_expression`这个结构，触发定义的逻辑执行，创建加法表达式结构体变量并返回。

3 + 3 + 4

5 + 4 * 8

5属于`add_expression`，遇到4之后发现它后面是个乘号，加法表达式本身不能处理乘号，只能由乘法表达式处理，所以当遇到后面是一个乘号时，5 + 4不会单独组成`add_expression`，而是会继续看乘号后面的内容，发现是一个8，而4可以属于`mult_expression`，8属于`primary_expression`，两者加上中间的乘号满足`mult_expression MUL primary_expression`，会生成并返回`mult_expression`，前面的5属于`add_expression`，这样5加上后面的乘法表达式，又构成一个`add_expression`，又触发`add_expression`的计算。

4 * 5 * (1 + 6)

4 * 5会构成`mult_expression`，因为5后面跟乘号不影响前面4 * 5先组合，然后遇到括号，根据定义，它会认为遇到了`primary_expression`，并试图在遇到右括号之前构建一个`expression`，然后它会像从头解析一样最终解析出了1 + 6是一个`add_expression`，而`add_expression`也属于`expression`，加上左右括号构成`primary_expression`，因为前面的4 * 5属于`mult_expression`，所以又符合了`mult_expression MUL primary_expression`结构，然后触发生成逻辑，接着可以一路往上属于。

`yacc`文件还有最后一部分，这里就定义了yyerror方法，第一部分声明过了，这里定义，在语法分析报错时执行该方法

``````%%
int yyerror(char const *str) {
extern char *yytext;
fprintf(stderr, "parse error near %s\n", yytext);
return 0;
}
``````

yacc -dv nl.y

lex nl.l

## 编译执行

``````#include <stdio.h>
#include <stdlib.h>

int main(int argc, char **argv) {
extern int yyparse(void);
extern FILE *yyin;

yyin = stdin;
if (yyparse()) {
fprintf(stderr, "Error ! \n");
exit(1);
}
}
``````

`main`方法里面声明了外部方法`yyparse`和外部指定输入的变量`yyin`，把`yyin`赋值了stdin，表示读入的是标准输入的内容，基本就是键盘输入的内容。

4 + 8 + 9 * 4 * (4+4*3+(3)+ -4)