diff --git a/lexicalStructure.lex b/lexicalStructure.lex index e658e4d..14b8616 100644 --- a/lexicalStructure.lex +++ b/lexicalStructure.lex @@ -3,78 +3,70 @@ /* definitions */ %option noyywrap - -%{ -#include "typedefs.h" +%{ +#include "typedefs.h" +int line_number = 1, column_number = 1; %} - int line_number = 1, column_number = 1; +COM ([^*]|\*+[^)*])* +ID [A-Za-z_][0-9A-Za-z_]* DIGIT [0-9] CHAR \\n|\\t|\\'|[^'\n\t\\] /* char can be a newline, tab, an escaped quote, or anything but a single quote, an actual line break, an actual tab, or a backslash by itself (to prevent confusion from escaped quote */ SCHAR \\n|\\t|\\\"|[^\"\n\\] - /*similar to above, a string Char (SCHAR) is the same as a CHAR except we cannot have double quotes instead of single quotes. Double quotes need to be escaped in Flex unlike single quotes based on documentation */ -STARTCOM \(\* -ENDCOM \*\) -COMMENTCHAR [^\*]|\*[^\)] - /*Making the contents of a comment anything that is either not a * or not a * followed by ) to terminate comments at the first ENDCOM */ -%% - - -\n line_number++ column_number = 1; -. column_number++; - -"integer" {return T_INTEGER} -"address" {return T_ADDRESS} -"Boolean" {return T_BOOLEAN} -"character" {return T_CHARACTER} - - - /* rules */ -{DIGIT}+ {printf( "C_INTEGER: %s (%d)\n", yytext, atoi( yytext ) );} - -"null" {printf( "C_NULL: %s (%d)\n", yytext, atoi( yytext ) );} - -"while" {return WHILE} -"if" {return IF} -"then" {return THEN} -"else" {return ELSE} -"type" {return TYPE} -"function" {return FUNCTION} -"return" {return RETURN} -"external" {return EXTERNAL} -"as" {return AS} - -'{CHAR}' {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ - -"true" {printf( "C_TRUE: %s (%d)\n", yytext, atoi( yytext ) );} - - -"false" {printf( "C_FALSE: %s (%d)\n", yytext, atoi( yytext ) );} - - /* OPERATORS */ - -"+" {return ADD;} -"-" {return SUB_OR_NEG;} -"*" {return MUL;} -"/" {return DIV;} -"%" {return REM;} -"<" {return LESS_THAN;} -"=" {return EQUAL_TO;} -":=" {return ASSIGN;} -"!" {return NOT;} -"&" {return AND;} -"|" {return OR;} -"." {return DOT;} -"reserve" {return RESERVE;} -"release" {return RELEASE;} - - -\"{SCHAR}*\" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} - -\(\*{COMMENTCHAR}*\*\) {printf( "COMMENT: %s (%d)\n", yytext, atoi( yytext ) );} - -.|\n - + /* similar to above, a string Char (SCHAR) is the same as a CHAR except we cannot have double quotes instead of single quotes. Double quotes need to be escaped in Flex unlike single quotes based on documentation */ %% + +"integer" {return T_INTEGER;} +"address" {return T_ADDRESS;} +"Boolean" {return T_BOOLEAN;} +"character" {return T_CHARACTER;} + +{DIGIT}+ {return C_INTEGER;} +"null" {return C_NULL;} + +"while" {return WHILE;} +"if" {return IF;} +"then" {return THEN;} +"else" {return ELSE;} +"type" {return TYPE;} +"function" {return FUNCTION;} +"return" {return RETURN;} +"external" {return EXTERNAL;} +"as" {return AS;} + +'{CHAR}' {return C_CHARACTER;} +"true" {return C_TRUE;} +"false" {return C_FALSE;} + +"+" {return ADD;} +"-" {return SUB_OR_NEG;} +"*" {return MUL;} +"/" {return DIV;} +"%" {return REM;} +"<" {return LESS_THAN;} +"=" {return EQUAL_TO;} +":=" {return ASSIGN;} +"!" {return NOT;} +"&" {return AND;} +"|" {return OR;} +"." {return DOT;} + +";" {return SEMI_COLON;} +":" {return COLON;} +"," {return COMMA;} +"->" {return ARROW;} + +"reserve" {return RESERVE;} +"release" {return RELEASE;} + +\"{SCHAR}*\" {return C_STRING;} +"(*"{COM}"*)" {return COMMENT;} + +{ID} {return ID;} + +\n {line_number++; column_number = 1;} +. {column_number++;} + +%% \ No newline at end of file diff --git a/tests/test_comments.alpha b/tests/test_comments.alpha new file mode 100644 index 0000000..1111428 --- /dev/null +++ b/tests/test_comments.alpha @@ -0,0 +1,9 @@ +(* hello *) +(* hello *) +(* I'd think this is a legal "string" that contains several \n \t +escaped characters, isn't it? *) +(* \ *) +(* *) +(*{COMMENT}+ *) +(* * *) +(* (hello) *) \ No newline at end of file diff --git a/tests/test_otherpunc.alpha b/tests/test_otherpunc.alpha new file mode 100644 index 0000000..bd1f2de --- /dev/null +++ b/tests/test_otherpunc.alpha @@ -0,0 +1,6 @@ +; +: +, +-> +->> +--> \ No newline at end of file diff --git a/tests/test_types.alpha b/tests/test_types.alpha deleted file mode 100644 index 4fde8cd..0000000 --- a/tests/test_types.alpha +++ /dev/null @@ -1,10 +0,0 @@ -integer -Integer -address -Address -Boolean -boolean -character -Character -string -String diff --git a/tests/test_variables.alpha b/tests/test_variables.alpha new file mode 100644 index 0000000..26db6c0 --- /dev/null +++ b/tests/test_variables.alpha @@ -0,0 +1,10 @@ +valid1 +Valid2 +_valid3 +_valid_name_4 +VALID +0Invalid +1invalid +"invalid +invalid= +String \ No newline at end of file