flex/bison系列4:更复杂的一个编译程序实现(下)

这是一个系列文章,旨在了解如何使用Flex/Lex和Yacc/Bison进行词法和语法解析。在前面,已经完成了使用Lex/flex做基础的词法解析实现一个简单的计算器flex/bison系列3:更复杂的一个编译程序实现(上)。在上篇中,已经完成语法规则、主要的数据结构设计。这里就继续完成程序,最后编译测试。

回顾

这个系列我们需要通过flex/bison实现一个编译程序,能够实现一种简单的程序语言,这个程序语言包含了:基础运算、变量与赋值、表达式计算、if语句、while语句、print语句等。例如,使用该程序语言,我们可以实现如下程序:

i = 1;
a = 0;
while ( a < 100 ) {
  i = i + 1;
  a = a + i;
}
print i;

该程序解决的问题是:在自然数序列(1、2、4…)中,前面多少个自然数的和首次大于100。你可以使用上面定义的语言,编写自己的程序。

好了,接着前面三篇的内容,我们继续完成该语言的编译程序。

主要的函数实现

build_node函数
t_node* build_node(enum NODETYPE nt,t_node* left,t_node* right, int i){
    debug_print(__FILE__,__LINE__,__func__,"");
    t_node *t_n;
    t_n = NULL;
    t_n = (t_node *)malloc(sizeof(t_node));
    if (t_n == NULL){
        printf("Out of Memory\n");
        exit(1);
    }
    t_n->nt = nt;
    t_n->left = left;
    t_n->right = right;
    t_n->i = i;
    return t_n;
}
exec_node函数
int exec_node(t_node *n){
    if( n == NULL ) return 0;
    debug_print(__FILE__,__LINE__,__func__,"enter exec_node");

    switch(n->nt){
        case NT_INTEGER:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_INTEGER node");
	    break;
        case NT_VAR_NAME:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_VAR_NAME node");
	    break;
        case NT_O_ADD:
            exec_node(n->left);
            exec_node(n->right);
            n->i = get_node_ret(n->left) + get_node_ret(n->right);
    	    debug_print(__FILE__,__LINE__,__func__,"NT_O_ADD node");
	    break;
        case NT_O_MINUS:
            exec_node(n->left);
            exec_node(n->right);
            n->i = get_node_ret(n->left) - get_node_ret(n->right);
    	    debug_print(__FILE__,__LINE__,__func__,"NT_O_MINUS node");
            break;
        case NT_O_MULTIPLY:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_O_MULTIPLY node");
            exec_node(n->left);
            exec_node(n->right);
            n->i = get_node_ret(n->left) * get_node_ret(n->right);
            break;
        case NT_BOOL_EXPR_GT:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_GT node");
            n->i = 0;
            exec_node(n->left);
            exec_node(n->right);
            if (get_node_ret(n->left) > get_node_ret(n->right) )
            { n->i = 1 ; }
            break;
        case NT_BOOL_EXPR_LT:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_LT node");
            n->i = 0;
            exec_node(n->left);
            exec_node(n->right);
            if (get_node_ret(n->left) < get_node_ret(n->right) )
            { n->i = 1 ; }
            break;
        case NT_IF:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_IF node");
            exec_node(n->left);
            if (get_node_ret(n->left)){
                exec_node(n->right);
            }
            break;
        case NT_WHILE:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_WHILE node");
            exec_node(n->left);
            while( get_node_ret(n->left)  ){
                exec_node(n->right);
                exec_node(n->left);
            }
            break;
        case NT_PRINT:
            debug_print(__FILE__,__LINE__,__func__,"NT_PRINT node");
            exec_node(n->left);
            printf("print '%d'",get_node_ret(n->left));
            break;
        case NT_ASSIGNMENT:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_ASSIGNMENT node");
            exec_node(n->left);
            exec_node(n->right);
            var[n->left->i - 'a'] = get_node_ret(n->right);
            break;
        case NT_STATEMENT_BLOCK:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_STATEMENT_BLOCK node");
            exec_node(n->left);
            exec_node(n->right);
            break;
        case NT_PROGRAM:
    	    debug_print(__FILE__,__LINE__,__func__,"NT_PROGRAM node");
            break;
    }

    return 0;
}
内存释放
int free_node(t_node *n){
    if( n != NULL ) {
        free_node(n->left);
        free_node(n->right);
    }
    free(n);
    return 0;
}
工具函数debug_print
void debug_print(const char *fname, int lineno, const char *fxname, const char *debug_info){
    #ifdef cal_DEBUG
    printf("\n debug: enter at line %d in %s,function: %s info: %s\n",
        lineno,
        fname,
        fxname,
		debug_info
        );
	#endif
}

补充语法文件的Action部分

%%
program:  statement_block
            {
                exec_node($1);
                free_node($1);
                printf("\n job done! \n");
            }
;

statement_block: %empty
            {
                $$ = build_node(
                        NT_STATEMENT_BLOCK,
                        NULL,
                        NULL,
                        NULL,
                        0
                        );
                debug_print(__FILE__,__LINE__,__func__,"");
            }

        | statement_block statement
            { $$ = build_node(
                    NT_STATEMENT_BLOCK,
                    $1,
                    $2,
                    NULL,
                    0
                    );

                debug_print(__FILE__,__LINE__,__func__,"");
            }
;

statement: assignment { $$ = $1; }
        | print_func  { $$ = $1; }
        | if_block    { $$ = $1; }
        | while_block { $$ = $1; }
;

if_block: IF '(' bool_expr ')' '{' statement_block '}'
            {
                $$ = build_node(
                        NT_IF,
                        $3,
                        $6,
                        NULL,
                        0
                        );
            }

while_block: WHILE '('  bool_expr ')' '{' statement_block '}'
            {
                $$ = build_node(
                        NT_WHILE,
                        $3,
                        $6,
                        NULL,
                        0
                        );
            }

assignment: VAR_NAME '=' expression ';' { $$ = build_node(
                                                    NT_ASSIGNMENT,
                                                    build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1),
                                                    $3,
                                                    NULL,
                                                    0);
                                        }

print_func : PRINT expression ';'   {  $$ = build_node(NT_PRINT,$2,NULL,NULL,0); }

bool_expr: expression GT expression {  $$ = build_node(NT_BOOL_EXPR_GT,$1,$3,NULL,0);}
        |  expression LT expression {  $$ = build_node(NT_BOOL_EXPR_LT,$1,$3,NULL,0);}

expression: INTEGER { $$ = build_node(NT_INTEGER,NULL,NULL,NULL,$1); }
        | VAR_NAME  { $$ = build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1); }
        | expression O_ADD expression {  $$ = build_node(NT_O_ADD,$1,$3,NULL,0);}
        | expression O_MINUS expression  {  $$ = build_node(NT_O_MINUS,$1,$3,NULL,0);}
        | expression O_MULTIPLY expression {  $$ = build_node(NT_O_MULTIPLY,$1,$3,NULL,0);}


%%

cal.header.h

enum NODETYPE{
    NT_STATEMENT,
    NT_IF,
    NT_WHILE,
    NT_PROGRAM,
    NT_STATEMENT_BLOCK,
    NT_O_ADD,
    NT_O_MINUS,
    NT_O_MULTIPLY,
    NT_INTEGER,
    NT_VAR_NAME,
    NT_BOOL_EXPR_GT,
    NT_BOOL_EXPR_LT,
    NT_PRINT,
    NT_ASSIGNMENT
};

typedef struct t_node{
    enum NODETYPE nt;
    struct t_node* left;
    struct t_node* right;
    struct t_node* rrnode;
    int i;  // for NT_INTEGER NT_VAR_NAME node
}t_node;

有了这些信息,就可以使用NODETYPE来构建,解析树了。在每次解析到对应节点或进行Reduction时,我们在语法文件的Action部分就可以调用一个build_node函数来构建对应的节点。我们可以看看如下的程序的解析树结构:

i = 1 ;
a = 0 ;
while ( a < 100 ) {
    a = a + i;
    i = i + 1;
}
print i ;

这个程序,可以找到,在自然数级数中,到第几项的时候,其和就超过了100。

完整的代码

最后是程序实现的部分,包括

  • build_node
  • execute_node
  • free_node
  • get_node_ret

cal.l lex文件
cat cal.l

%{
    #include "cal.tab.h"
%}
%option noyywrap
%%
[[:digit:]]+ {
    yylval.a = atoi(yytext);
    return INTEGER;
}

[a-z] {
    yylval.c = yytext[0];
    return VAR_NAME;
}

"+" { return O_ADD;};
"-" { return O_MINUS;};
"*" { return O_MULTIPLY;};

"while"  {return WHILE;}
"if"  {return IF;}
"print"  {return PRINT;}

">" {return GT;}
"<" {return LT;}

[();={}]  {return yylval.c = *yytext;}

%%
cal.header.h 头文件/数据结构定义
cat cal.header.h

enum NODETYPE{
    NT_STATEMENT,
    NT_IF,
    NT_WHILE,
    NT_PROGRAM,
    NT_STATEMENT_BLOCK,
    NT_O_ADD,
    NT_O_MINUS,
    NT_O_MULTIPLY,
    NT_INTEGER,
    NT_VAR_NAME,
    NT_BOOL_EXPR_GT,
    NT_BOOL_EXPR_LT,
    NT_PRINT,
    NT_ASSIGNMENT
};

typedef struct t_node{
    enum NODETYPE nt;
    struct t_node* left;
    struct t_node* right;
    struct t_node* rrnode;
    int i;  // for NT_INTEGER NT_VAR_NAME node
}t_node;
cal.y 语言语法文件
cat cal.y
%{

#include <stdio.h>
#include <stdlib.h>
#include "cal.tab.h"
#include "cal.header.h"

// #define cal_DEBUG 1

void debug_print(const char *fname, int lineno, const char *fxname, const char *debug_info){
    #ifdef cal_DEBUG
    printf("\n debug: enter at line %d in %s,function: %s info: %s\n",
        lineno,
        fname,
        fxname,
		debug_info
        );
	#endif
}


t_node* build_node(enum NODETYPE nt,t_node* left,t_node* right, t_node* r_right , int i){
    debug_print(__FILE__,__LINE__,__func__,"");
    t_node *t_n;
    t_n = NULL;
    t_n = (t_node *)malloc(sizeof(t_node));
    if (t_n == NULL){
        printf("Out of Memory\n");
        exit(1);
    }
	t_n->nt = nt;
	t_n->left = left;
	t_n->right = right;
	t_n->rrnode = r_right;
	t_n->i = i;
    return t_n;
}


int var[26];

int main (){
    int yydebug=1;
    yyparse();
    return 0;
}

void
yyerror (char const *s)
{
  fprintf (stderr, "something error: %s\n over", s);
}


int exec_node(t_node *n){
    if( n == NULL ) return 0;
    debug_print(__FILE__,__LINE__,__func__,"enter exec_node");

    switch(n->nt){
        case NT_INTEGER:
    		debug_print(__FILE__,__LINE__,__func__,"NT_INTEGER node");
			break;
        case NT_VAR_NAME:
    		debug_print(__FILE__,__LINE__,__func__,"NT_VAR_NAME node");
			break;
        case NT_O_ADD:
            exec_node(n->left);
            exec_node(n->right);
            n->i = get_node_ret(n->left) + get_node_ret(n->right);
    		debug_print(__FILE__,__LINE__,__func__,"NT_O_ADD node");
			break;
        case NT_O_MINUS:
            exec_node(n->left);
            exec_node(n->right);
            n->i = get_node_ret(n->left) - get_node_ret(n->right);
    		debug_print(__FILE__,__LINE__,__func__,"NT_O_MINUS node");
            break;
        case NT_O_MULTIPLY:
    		debug_print(__FILE__,__LINE__,__func__,"NT_O_MULTIPLY node");
            exec_node(n->left);
            exec_node(n->right);
            n->i = get_node_ret(n->left) * get_node_ret(n->right);
            break;
        case NT_BOOL_EXPR_GT:
    		debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_GT node");
            n->i = 0;
            exec_node(n->left);
            exec_node(n->right);
            if (get_node_ret(n->left) > get_node_ret(n->right) )
            { n->i = 1 ; }
            break;
        case NT_BOOL_EXPR_LT:
    		debug_print(__FILE__,__LINE__,__func__,"NT_BOOL_EXPR_LT node");
            n->i = 0;
            exec_node(n->left);
            exec_node(n->right);
            if (get_node_ret(n->left) < get_node_ret(n->right) )
            { n->i = 1 ; }
            break;
        case NT_IF:
    		debug_print(__FILE__,__LINE__,__func__,"NT_IF node");
            exec_node(n->left);
            if (get_node_ret(n->left)){
                exec_node(n->right);
            }
            break;
        case NT_WHILE:
    		debug_print(__FILE__,__LINE__,__func__,"NT_WHILE node");
            exec_node(n->left);
            while( get_node_ret(n->left)  ){
                exec_node(n->right);
                exec_node(n->left);
            }
            break;
        case NT_PRINT:
    		debug_print(__FILE__,__LINE__,__func__,"NT_PRINT node");
            exec_node(n->left);
            printf("print '%d'",get_node_ret(n->left));
            break;
        case NT_ASSIGNMENT:
    		debug_print(__FILE__,__LINE__,__func__,"NT_ASSIGNMENT node");
            exec_node(n->left);
            exec_node(n->right);
            var[n->left->i - 'a'] = get_node_ret(n->right);
            break;
        case NT_STATEMENT_BLOCK:
    		debug_print(__FILE__,__LINE__,__func__,"NT_STATEMENT_BLOCK node");
            exec_node(n->left);
            exec_node(n->right);
            break;
        case NT_PROGRAM:
    		debug_print(__FILE__,__LINE__,__func__,"NT_PROGRAM node");
            break;
    }

    return 0;
}

int get_node_ret(t_node *n){
    int r = n->i;
    switch(n->nt){
        case NT_VAR_NAME:
            r = var[n->i - 'a'];
            break;
    }
    return r;
}

int free_node(t_node *n){
    if(n != NULL){
        // printf("\n try to free memory of node %d \n",n->nt);
    }
    return 0;
}


%}

%union {
    int a;  // for integer
    char c; // for var_name
    int int_bool; // for bool_expr
    struct t_node* t_n;
}


%type <t_n> expression bool_expr print_func assignment
%type <t_n> while_block statement statement_block if_block

%token <c> VAR_NAME
%token <a> INTEGER

%token O_ADD O_MINUS O_MULTIPLY

%token GT LT

%token WHILE IF
%token PRINT

%left O_ADD O_MINUS
%left O_MULTIPLY

%%
program:  statement_block
            {
                exec_node($1);
                free_node($1);
                printf("\n job done! \n");
            }
;

statement_block: %empty
            {
                $$ = build_node(
                        NT_STATEMENT_BLOCK,
                        NULL,
                        NULL,
                        NULL,
                        0
                        );
                debug_print(__FILE__,__LINE__,__func__,"");
            }

        | statement_block statement
            { $$ = build_node(
                    NT_STATEMENT_BLOCK,
                    $1,
                    $2,
                    NULL,
                    0
                    );

                debug_print(__FILE__,__LINE__,__func__,"");
            }
;

statement: assignment { $$ = $1; }
        | print_func  { $$ = $1; }
        | if_block    { $$ = $1; }
        | while_block { $$ = $1; }
;

if_block: IF '(' bool_expr ')' '{' statement_block '}'
            {
                $$ = build_node(
                        NT_IF,
                        $3,
                        $6,
                        NULL,
                        0
                        );
            }

while_block: WHILE '('  bool_expr ')' '{' statement_block '}'
            {
                $$ = build_node(
                        NT_WHILE,
                        $3,
                        $6,
                        NULL,
                        0
                        );
            }

assignment: VAR_NAME '=' expression ';' { $$ = build_node(
                                                    NT_ASSIGNMENT,
                                                    build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1),
                                                    $3,
                                                    NULL,
                                                    0);
                                        }

print_func : PRINT expression ';'   {  $$ = build_node(NT_PRINT,$2,NULL,NULL,0); }

bool_expr: expression GT expression {  $$ = build_node(NT_BOOL_EXPR_GT,$1,$3,NULL,0);}
        |  expression LT expression {  $$ = build_node(NT_BOOL_EXPR_LT,$1,$3,NULL,0);}

expression: INTEGER { $$ = build_node(NT_INTEGER,NULL,NULL,NULL,$1); }
        | VAR_NAME  { $$ = build_node(NT_VAR_NAME,NULL,NULL,NULL,(int)$1); }
        | expression O_ADD expression {  $$ = build_node(NT_O_ADD,$1,$3,NULL,0);}
        | expression O_MINUS expression  {  $$ = build_node(NT_O_MINUS,$1,$3,NULL,0);}
        | expression O_MULTIPLY expression {  $$ = build_node(NT_O_MULTIPLY,$1,$3,NULL,0);}


%%

编译与执行

lex cal.l && \
bison -d cal.y && \
gcc cal.tab.c lex.yy.c -o a.out && \
./a.out < p.f.txt

最后,需要注意,该程序更注重的是测试与实现,所以在“内存释放”可能会存在一些泄露的问题。

Leave a Reply

Your email address will not be published. Required fields are marked *