Adding More Operators and Operator Precedence

When the Parser was first set up it only was able to recognize the =, +, -, *, and / operators. More operators are needed to create a C compiler such as logical and bitwise operators. First, the Lexer must be updated to return tokens for these operators. For this, first the enum of operator types was expanded:

typedef enum Operator_t
{
	PLUSop,
	MINUSop,
	MULTop,
	DIVop,
	MODop,
	EQop,
	NOTEQop,
	GTop,
	LTop,
	GTEQop,
	LTEQop,
	LSHIFTop,
	RSHIFTop,
	BITANDop,
	BITXORop,
	BITORop,
	LOGANDop,
	LOGORop,
	ASSIGNop,
	PLUS_ASSIGNop,
	MIN_ASSIGNop,
	MULT_ASSIGNop,
	DIV_ASSIGNop,
	MOD_ASSIGNop,
	LSHIFT_ASSIGNop,
	RSHIFT_ASSIGNop,
	BITAND_ASSIGNop,
	BITXOR_ASSIGNop,
	BITOR_ASSIGNop
} Operator;

Then, the Lexer was updated to return tokens for each of the operators. Note that these operators now also return an integer to be used to set the operator field in an expression node:

%%

"+" {
    yylval.intval = PLUSop;
    return PLUS;
}
"-" {
    yylval.intval = MINUSop;
    return MINUS;
}
"/" {
    yylval.intval = DIVop;
    return DIVIDE;
}
"*" {
    yylval.intval = MULTop;
    return MULT;
}
"==" {
    yylval.intval = EQop;
    return EQ_COMP;
}
"+=" {  
    yylval.intval = PLUS_ASSIGNop;
    return PLUS_ASSIGN;
}
"-=" {  
    yylval.intval = MIN_ASSIGNop;
    return MIN_ASSIGN;
}
"/=" {  
    yylval.intval = DIV_ASSIGNop;
    return DIV_ASSIGN;
}
"*=" {  
    yylval.intval = MULT_ASSIGNop;
    return MULT_ASSIGN;
}
"%=" {  
    yylval.intval = MOD_ASSIGNop;
    return MOD_ASSIGN;
}
"&=" {  
    yylval.intval = BITAND_ASSIGNop;
    return BITAND_ASSIGN;
}
"|=" {  
    yylval.intval = BITOR_ASSIGNop;
    return BITOR_ASSIGN;
}
"^=" {  
    yylval.intval = BITXOR_ASSIGNop;
    return BITXOR_ASSIGN;
}
"<<=" {  
    yylval.intval = LSHIFT_ASSIGNop;
    return LSHIFT_ASSIGN;
}
">>=" {  
    yylval.intval = RSHIFT_ASSIGNop;
    return RSHIFT_ASSIGN;
}
"!=" {
    yylval.intval = NOTEQop;
    return NOTEQ_COMP;
}
"<<" {
    yylval.intval = LSHIFTop;
    return L_SHIFT;
}
">>" {
    yylval.intval = RSHIFTop;
    return R_SHIFT;
}
">=" {
    yylval.intval = GTEQop;
    return GTEQ_COMP;
}
"<=" { 
    yylval.intval = LTEQop;
    return LTEQ_COMP;
}
">" {
    yylval.intval = GTop;
    return GT_COMP;
}
"<" {  
    yylval.intval = LTop;
    return LT_COMP;
}
"=" {  
    yylval.intval = ASSIGNop;
    return ASSIGN;
}
"%" {
    yylval.intval = MODop;
    return MOD;
}
"&&" {
    yylval.intval = LOGANDop;
    return LOGAND;
}
"&" {
    yylval.intval = BITANDop;
    return BITAND;
}
"||" {
    yylval.intval = LOGORop;
    return LOGOR;
}
"|" {
    yylval.intval = BITORop;
    return BITOR;
}
"^" {
    yylval.intval = BITXORop;
    return BITXOR;
}

Updating the token declarations in the Parser results in:

%token <intval> ASSIGN PLUS_ASSIGN MIN_ASSIGN MULT_ASSIGN DIV_ASSIGN MOD_ASSIGN LSHIFT_ASSIGN RSHIFT_ASSIGN BITAND_ASSIGN BITXOR_ASSIGN BITOR_ASSIGN

%token <intval> BITAND BITXOR BITOR LOGAND LOGOR
%token <intval> EQ_COMP NOTEQ_COMP
%token <intval> GT_COMP LT_COMP GTEQ_COMP LTEQ_COMP
%token <intval> R_SHIFT L_SHIFT
%token <intval> PLUS MINUS
%token <intval> MULT DIVIDE MOD
%token <intval> INCR

Then, the grammar for the expression type must be updated to account for these new operators. Note that now we can use $2 in the create_new_ast_expr_node function rather than an enum as the integer is passed from the Lexer:

expression:
term { $$ = $1; }
| variable ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable PLUS_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable MIN_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable MULT_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable DIV_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable MOD_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable LSHIFT_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable RSHIFT_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable BITAND_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable BITXOR_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| variable BITOR_ASSIGN expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression LOGOR expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression LOGAND expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression BITOR expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression BITXOR expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression BITAND expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression EQ_COMP expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression NOTEQ_COMP expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression GT_COMP expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression LT_COMP expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression GTEQ_COMP expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression LTEQ_COMP expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression R_SHIFT expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression L_SHIFT expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression PLUS expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression MINUS expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression MULT expression { $$ = new_ast_expr_node($1, $2, $3); }
| expression DIVIDE expression { $$ = new_ast_expr_node($1, $2, $3); }
;

Now are Parser can handle more operators; however, there is no operator precedence. In C, operator precedence is determines what operators should be evaluated before others. For example, take the expression 1*2+3. The multiplication of 1 and 2 should occur first; however, without operator precedence, the compiler evaluates the addition first:

To add operator precedence in bison, the %left and %right tokens can be used. %left is used for left to right precedence (operators on the same precedence level occur left to right) and %right is the inverse of %left. Precedence levels are set up as follows (the lower they are, the earlier they will be evaluated):

%right ASSIGN PLUS_ASSIGN MIN_ASSIGN MULT_ASSIGN DIV_ASSIGN MOD_ASSIGN LSHIFT_ASSIGN RSHIFT_ASSIGN BITAND_ASSIGN BITXOR_ASSIGN BITOR_ASSIGN
%left LOGOR
%left LOGAND
%left BITOR
%left BITXOR 
%left BITAND 
%left EQ_COMP NOTEQ_COMP
%left GT_COMP LT_COMP GTEQ_COMP LTEQ_COMP
%left R_SHIFT L_SHIFT
%left PLUS MINUS
%left MULT DIVIDE MOD

Now running the same expression results in the multiplication occurring first:

There are still more operators to be added but for now a good base has been set and operator precedence has been introduced.