这是一个系列文章,旨在了解如何使用Flex/Lex和Yacc/Bison进行词法和语法解析。在前面,已经完成了使用Lex/flex做基础的词法解析、实现一个简单的计算器、flex/bison系列3:更复杂的一个编译程序实现(上)。在上篇中,已经完成语法规则、主要的数据结构设计。这里就继续完成程序,最后编译测试。
回顾
这个系列我们需要通过flex/bison实现一个编译程序,能够实现一种简单的程序语言,这个程序语言包含了:基础运算、变量与赋值、表达式计算、if
语句、while
语句、print
语句等。例如,使用该程序语言,我们可以实现如下程序:
i = 1;
a = 0;
while ( a < 100 ) {
i = i + 1;
a = a + i;
}
print i;
该程序解决的问题是:在自然数序列(1、2、4…)中,前面多少个自然数的和首次大于100。你可以使用上面定义的语言,编写自己的程序。
好了,接着前面三篇的内容,我们继续完成该语言的编译程序。
主要的函数实现
build_node函数
t_node* build_node(enum NODETYPE nt,t_node* left,t_node* right, int i){
debug_print(__FILE__,__LINE__,__func__,"");
t_node *t_n;
t_n = NULL;
t_n = (t_node *)malloc(sizeof(t_node));
if (t_n == NULL){
printf("Out of Memory\n");
exit(1);
}
t_n->nt = nt;
t_n->left = left;
t_n->right = right;
t_n->i = i;
return t_n;
}
exec_node函数
int exec_node(t_node *n){
if( n == NULL ) return 0;
debug_print(__FILE__,__LINE__,__func__,"enter exec_node");
switch(n->nt){
case NT_INTEGER:
debug_print(__FILE__,__LINE__,__func__,"NT_INTEGER node");
break;
case NT_VAR_NAME:
debug_print(__FILE__,__LINE__,__func__,"NT_VAR_NAME node");
break;
case NT_O_ADD:
exec_node(n->left);
exec_node(n->right);
n->i = get_node_ret(n->left) + get_node_ret(n->right);
debug_print(__FILE__,__LINE__,__func__,"NT_O_ADD node");
break;
case NT_O_MINUS:
exec_node(n->left);
exec_node(n->right);
n->i = get_node_ret(n->left) - get_node_ret(n->right);
debug_print(__FILE__,__LINE__,__func__,"NT_O_MINUS node");
break;
case NT_O_MULTIPLY:
debug_print(__FILE__,__LINE__,__func__,"NT_O_MULTIPLY node");
exec_node(n->left);
exec_node(n->right);
n->i = get_node_ret(n->left) * get_node_ret(n->right);
break;
case NT_BOOL_EXPR_GT:
debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_GT node");
n->i = 0;
exec_node(n->left);
exec_node(n->right);
if (get_node_ret(n->left) > get_node_ret(n->right) )
{ n->i = 1 ; }
break;
case NT_BOOL_EXPR_LT:
debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_LT node");
n->i = 0;
exec_node(n->left);
exec_node(n->right);
if (get_node_ret(n->left) < get_node_ret(n->right) )
{ n->i = 1 ; }
break;
case NT_IF:
debug_print(__FILE__,__LINE__,__func__,"NT_IF node");
exec_node(n->left);
if (get_node_ret(n->left)){
exec_node(n->right);
}
break;
case NT_WHILE:
debug_print(__FILE__,__LINE__,__func__,"NT_WHILE node");
exec_node(n->left);
while( get_node_ret(n->left) ){
exec_node(n->right);
exec_node(n->left);
}
break;
case NT_PRINT:
debug_print(__FILE__,__LINE__,__func__,"NT_PRINT node");
exec_node(n->left);
printf("print '%d'",get_node_ret(n->left));
break;
case NT_ASSIGNMENT:
debug_print(__FILE__,__LINE__,__func__,"NT_ASSIGNMENT node");
exec_node(n->left);
exec_node(n->right);
var[n->left->i - 'a'] = get_node_ret(n->right);
break;
case NT_STATEMENT_BLOCK:
debug_print(__FILE__,__LINE__,__func__,"NT_STATEMENT_BLOCK node");
exec_node(n->left);
exec_node(n->right);
break;
case NT_PROGRAM:
debug_print(__FILE__,__LINE__,__func__,"NT_PROGRAM node");
break;
}
return 0;
}
内存释放
int free_node(t_node *n){
if( n != NULL ) {
free_node(n->left);
free_node(n->right);
}
free(n);
return 0;
}
工具函数debug_print
void debug_print(const char *fname, int lineno, const char *fxname, const char *debug_info){
#ifdef cal_DEBUG
printf("\n debug: enter at line %d in %s,function: %s info: %s\n",
lineno,
fname,
fxname,
debug_info
);
#endif
}
补充语法文件的Action部分
%%
program: statement_block
{
exec_node($1);
free_node($1);
printf("\n job done! \n");
}
;
statement_block: %empty
{
$$ = build_node(
NT_STATEMENT_BLOCK,
NULL,
NULL,
NULL,
0
);
debug_print(__FILE__,__LINE__,__func__,"");
}
| statement_block statement
{ $$ = build_node(
NT_STATEMENT_BLOCK,
$1,
$2,
NULL,
0
);
debug_print(__FILE__,__LINE__,__func__,"");
}
;
statement: assignment { $$ = $1; }
| print_func { $$ = $1; }
| if_block { $$ = $1; }
| while_block { $$ = $1; }
;
if_block: IF '(' bool_expr ')' '{' statement_block '}'
{
$$ = build_node(
NT_IF,
$3,
$6,
NULL,
0
);
}
while_block: WHILE '(' bool_expr ')' '{' statement_block '}'
{
$$ = build_node(
NT_WHILE,
$3,
$6,
NULL,
0
);
}
assignment: VAR_NAME '=' expression ';' { $$ = build_node(
NT_ASSIGNMENT,
build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1),
$3,
NULL,
0);
}
print_func : PRINT expression ';' { $$ = build_node(NT_PRINT,$2,NULL,NULL,0); }
bool_expr: expression GT expression { $$ = build_node(NT_BOOL_EXPR_GT,$1,$3,NULL,0);}
| expression LT expression { $$ = build_node(NT_BOOL_EXPR_LT,$1,$3,NULL,0);}
expression: INTEGER { $$ = build_node(NT_INTEGER,NULL,NULL,NULL,$1); }
| VAR_NAME { $$ = build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1); }
| expression O_ADD expression { $$ = build_node(NT_O_ADD,$1,$3,NULL,0);}
| expression O_MINUS expression { $$ = build_node(NT_O_MINUS,$1,$3,NULL,0);}
| expression O_MULTIPLY expression { $$ = build_node(NT_O_MULTIPLY,$1,$3,NULL,0);}
%%
enum NODETYPE{
NT_STATEMENT,
NT_IF,
NT_WHILE,
NT_PROGRAM,
NT_STATEMENT_BLOCK,
NT_O_ADD,
NT_O_MINUS,
NT_O_MULTIPLY,
NT_INTEGER,
NT_VAR_NAME,
NT_BOOL_EXPR_GT,
NT_BOOL_EXPR_LT,
NT_PRINT,
NT_ASSIGNMENT
};
typedef struct t_node{
enum NODETYPE nt;
struct t_node* left;
struct t_node* right;
struct t_node* rrnode;
int i; // for NT_INTEGER NT_VAR_NAME node
}t_node;
有了这些信息,就可以使用NODETYPE
来构建,解析树了。在每次解析到对应节点或进行Reduction时,我们在语法文件的Action部分就可以调用一个build_node
函数来构建对应的节点。我们可以看看如下的程序的解析树结构:
i = 1 ;
a = 0 ;
while ( a < 100 ) {
a = a + i;
i = i + 1;
}
print i ;
这个程序,可以找到,在自然数级数中,到第几项的时候,其和就超过了100。
完整的代码
最后是程序实现的部分,包括
- build_node
- execute_node
- free_node
- get_node_ret
cal.l lex文件
cat cal.l
%{
#include "cal.tab.h"
%}
%option noyywrap
%%
[[:digit:]]+ {
yylval.a = atoi(yytext);
return INTEGER;
}
[a-z] {
yylval.c = yytext[0];
return VAR_NAME;
}
"+" { return O_ADD;};
"-" { return O_MINUS;};
"*" { return O_MULTIPLY;};
"while" {return WHILE;}
"if" {return IF;}
"print" {return PRINT;}
">" {return GT;}
"<" {return LT;}
[();={}] {return yylval.c = *yytext;}
%%
cat cal.header.h
enum NODETYPE{
NT_STATEMENT,
NT_IF,
NT_WHILE,
NT_PROGRAM,
NT_STATEMENT_BLOCK,
NT_O_ADD,
NT_O_MINUS,
NT_O_MULTIPLY,
NT_INTEGER,
NT_VAR_NAME,
NT_BOOL_EXPR_GT,
NT_BOOL_EXPR_LT,
NT_PRINT,
NT_ASSIGNMENT
};
typedef struct t_node{
enum NODETYPE nt;
struct t_node* left;
struct t_node* right;
struct t_node* rrnode;
int i; // for NT_INTEGER NT_VAR_NAME node
}t_node;
cal.y 语言语法文件
cat cal.y
%{
#include <stdio.h>
#include <stdlib.h>
#include "cal.tab.h"
#include "cal.header.h"
// #define cal_DEBUG 1
void debug_print(const char *fname, int lineno, const char *fxname, const char *debug_info){
#ifdef cal_DEBUG
printf("\n debug: enter at line %d in %s,function: %s info: %s\n",
lineno,
fname,
fxname,
debug_info
);
#endif
}
t_node* build_node(enum NODETYPE nt,t_node* left,t_node* right, t_node* r_right , int i){
debug_print(__FILE__,__LINE__,__func__,"");
t_node *t_n;
t_n = NULL;
t_n = (t_node *)malloc(sizeof(t_node));
if (t_n == NULL){
printf("Out of Memory\n");
exit(1);
}
t_n->nt = nt;
t_n->left = left;
t_n->right = right;
t_n->rrnode = r_right;
t_n->i = i;
return t_n;
}
int var[26];
int main (){
int yydebug=1;
yyparse();
return 0;
}
void
yyerror (char const *s)
{
fprintf (stderr, "something error: %s\n over", s);
}
int exec_node(t_node *n){
if( n == NULL ) return 0;
debug_print(__FILE__,__LINE__,__func__,"enter exec_node");
switch(n->nt){
case NT_INTEGER:
debug_print(__FILE__,__LINE__,__func__,"NT_INTEGER node");
break;
case NT_VAR_NAME:
debug_print(__FILE__,__LINE__,__func__,"NT_VAR_NAME node");
break;
case NT_O_ADD:
exec_node(n->left);
exec_node(n->right);
n->i = get_node_ret(n->left) + get_node_ret(n->right);
debug_print(__FILE__,__LINE__,__func__,"NT_O_ADD node");
break;
case NT_O_MINUS:
exec_node(n->left);
exec_node(n->right);
n->i = get_node_ret(n->left) - get_node_ret(n->right);
debug_print(__FILE__,__LINE__,__func__,"NT_O_MINUS node");
break;
case NT_O_MULTIPLY:
debug_print(__FILE__,__LINE__,__func__,"NT_O_MULTIPLY node");
exec_node(n->left);
exec_node(n->right);
n->i = get_node_ret(n->left) * get_node_ret(n->right);
break;
case NT_BOOL_EXPR_GT:
debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_GT node");
n->i = 0;
exec_node(n->left);
exec_node(n->right);
if (get_node_ret(n->left) > get_node_ret(n->right) )
{ n->i = 1 ; }
break;
case NT_BOOL_EXPR_LT:
debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_LT node");
n->i = 0;
exec_node(n->left);
exec_node(n->right);
if (get_node_ret(n->left) < get_node_ret(n->right) )
{ n->i = 1 ; }
break;
case NT_IF:
debug_print(__FILE__,__LINE__,__func__,"NT_IF node");
exec_node(n->left);
if (get_node_ret(n->left)){
exec_node(n->right);
}
break;
case NT_WHILE:
debug_print(__FILE__,__LINE__,__func__,"NT_WHILE node");
exec_node(n->left);
while( get_node_ret(n->left) ){
exec_node(n->right);
exec_node(n->left);
}
break;
case NT_PRINT:
debug_print(__FILE__,__LINE__,__func__,"NT_PRINT node");
exec_node(n->left);
printf("print '%d'",get_node_ret(n->left));
break;
case NT_ASSIGNMENT:
debug_print(__FILE__,__LINE__,__func__,"NT_ASSIGNMENT node");
exec_node(n->left);
exec_node(n->right);
var[n->left->i - 'a'] = get_node_ret(n->right);
break;
case NT_STATEMENT_BLOCK:
debug_print(__FILE__,__LINE__,__func__,"NT_STATEMENT_BLOCK node");
exec_node(n->left);
exec_node(n->right);
break;
case NT_PROGRAM:
debug_print(__FILE__,__LINE__,__func__,"NT_PROGRAM node");
break;
}
return 0;
}
int get_node_ret(t_node *n){
int r = n->i;
switch(n->nt){
case NT_VAR_NAME:
r = var[n->i - 'a'];
break;
}
return r;
}
int free_node(t_node *n){
if(n != NULL){
// printf("\n try to free memory of node %d \n",n->nt);
}
return 0;
}
%}
%union {
int a; // for integer
char c; // for var_name
int int_bool; // for bool_expr
struct t_node* t_n;
}
%type <t_n> expression bool_expr print_func assignment
%type <t_n> while_block statement statement_block if_block
%token <c> VAR_NAME
%token <a> INTEGER
%token O_ADD O_MINUS O_MULTIPLY
%token GT LT
%token WHILE IF
%token PRINT
%left O_ADD O_MINUS
%left O_MULTIPLY
%%
program: statement_block
{
exec_node($1);
free_node($1);
printf("\n job done! \n");
}
;
statement_block: %empty
{
$$ = build_node(
NT_STATEMENT_BLOCK,
NULL,
NULL,
NULL,
0
);
debug_print(__FILE__,__LINE__,__func__,"");
}
| statement_block statement
{ $$ = build_node(
NT_STATEMENT_BLOCK,
$1,
$2,
NULL,
0
);
debug_print(__FILE__,__LINE__,__func__,"");
}
;
statement: assignment { $$ = $1; }
| print_func { $$ = $1; }
| if_block { $$ = $1; }
| while_block { $$ = $1; }
;
if_block: IF '(' bool_expr ')' '{' statement_block '}'
{
$$ = build_node(
NT_IF,
$3,
$6,
NULL,
0
);
}
while_block: WHILE '(' bool_expr ')' '{' statement_block '}'
{
$$ = build_node(
NT_WHILE,
$3,
$6,
NULL,
0
);
}
assignment: VAR_NAME '=' expression ';' { $$ = build_node(
NT_ASSIGNMENT,
build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1),
$3,
NULL,
0);
}
print_func : PRINT expression ';' { $$ = build_node(NT_PRINT,$2,NULL,NULL,0); }
bool_expr: expression GT expression { $$ = build_node(NT_BOOL_EXPR_GT,$1,$3,NULL,0);}
| expression LT expression { $$ = build_node(NT_BOOL_EXPR_LT,$1,$3,NULL,0);}
expression: INTEGER { $$ = build_node(NT_INTEGER,NULL,NULL,NULL,$1); }
| VAR_NAME { $$ = build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1); }
| expression O_ADD expression { $$ = build_node(NT_O_ADD,$1,$3,NULL,0);}
| expression O_MINUS expression { $$ = build_node(NT_O_MINUS,$1,$3,NULL,0);}
| expression O_MULTIPLY expression { $$ = build_node(NT_O_MULTIPLY,$1,$3,NULL,0);}
%%
编译与执行
lex cal.l && \
bison -d cal.y && \
gcc cal.tab.c lex.yy.c -o a.out && \
./a.out < p.f.txt
最后,需要注意,该程序更注重的是测试与实现,所以在“内存释放”可能会存在一些泄露的问题。