I have written some Flex and Bison code for parsing a file.
The following is a simplified example of the input file as well as the code used for parsing.
There are a few things I am not satisfied with:
- The way keywords are recognized (Marked with [1] in tokenizer.lex), because the rules also recognize the following whitespaces, which are not part of the keyword itself.
- The part after the colon (:) is quite unflexible. If there is a whitespace at the beginning, it is always recognized as
STRING.
There has been a nicer solution before, but that one couldn't handle spaces in the right hand side string.
I am mostly interested in the issues I mentioned above but I'm open to any suggestions, as this is a completely new topic to me.
example.txt
File :TheFileName.txt
Value :19
Date :17.08.16
Description :This is a description
grammar.y
%{
#include <stdio.h>
int yylex();
int yyparse();
FILE *yyin;
extern int line_num;
void yyerror(const char *s);
%}
%union {
int ival;
float fval;
char *sval;
char cval;
}
%token NEWLINE
%token PARAM_FILE
%token PARAM_DESCRIPTION
%token PARAM_DATE
%token PARAM_VALUE
%token <ival> INTEGER
%token <fval> FLOAT
%token <sval> STRING
%token <sval> DATE
%%
File: AssignList;
AssignList: Assign
| Assign NEWLINES
| Assign NEWLINES AssignList
;
Assign: PARAM_FILE ':' STRING {printf("Found 'File': [%s]\n", $3);}
| PARAM_DESCRIPTION ':' STRING {printf("Found 'Description': [%s]\n", $3);}
| PARAM_DATE ':' DATE {printf("Found 'Date': [%s]\n", $3);}
| PARAM_VALUE ':' INTEGER {printf("Found 'Value': [%d]\n", $3);}
;
NEWLINES: NEWLINES NEWLINE
| NEWLINE
;
%%
int main(int argc, char *argv[]) {
yydebug = 0;
do {
yyparse();
} while (!feof(yyin));
return 0;
}
void yyerror(const char *s) {
printf("Error parsing file! Error on line %d, Message: %s\n", line_num, s);
}
tokenizer.lex
%option noyywrap
%{
#include "grammar.tab.h"
int line_num = 1;
%}
char [^:\n]
digit [0-9]
digit1 [1-9]
integer {digit1}{digit}*
float (integer|0)("."|,){digit}+
date {digit}{2}"."{digit}{2}"."{digit}{2}
string {char}+
%%
/* [1] I don't like this solution */
File[ \t]* { return PARAM_FILE; }
Description[ \t]* { return PARAM_DESCRIPTION; }
Date[ \t]* { return PARAM_DATE; }
Value[ \t]* { return PARAM_VALUE; }
: { return ':'; }
{integer} { yylval.ival = atoi(yytext); return INTEGER; }
{float} { yylval.fval = atof(yytext); return FLOAT; }
{date} { yylval.sval = strdup(yytext); return DATE; }
[ \t] ;
\n { line_num++; return NEWLINE; } /* count the line numbers */
{string} { yylval.sval = strdup(yytext); return STRING; }
%%
/* Empty 3rd section */
Makefile
grammar.tab.c: grammar.y
bison -d grammar.y
grammar.tab.h: grammar.y
bison -d grammar.y
lex.yy.c: tokenizer.lex
flex.exe tokenizer.lex
parser.exe: lex.yy.c grammar.tab.c grammar.tab.h
gcc -DYYDEBUG=1 lex.yy.c grammar.tab.c -o parser.exe
run: parser.exe
type example.txt | parser.exe
output of type example.txt | parser.exe
Found 'File': [TheFileName.txt]
Found 'Value': [19]
Found 'Date': [17.08.16]
Found 'Description': [This is a description]