From cfd64648d9dd4d2b9741ed7d406fd15c82096700 Mon Sep 17 00:00:00 2001 From: Partho Bhattacharya Date: Fri, 7 Feb 2025 00:59:03 -0500 Subject: [PATCH 1/6] Added simple rules for literals based on documentation. Still have to test them and refine further --- lexicalStructure.lex | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lexicalStructure.lex b/lexicalStructure.lex index c41b8ce..706baca 100644 --- a/lexicalStructure.lex +++ b/lexicalStructure.lex @@ -7,6 +7,17 @@ %} %% -//rules +/* rules */ +[0-9]+ {printf( "C_INTEGER: %s (%d)\n", yytext, atoi( yytext ) );} + +"null" {printf( "C_NULL: %s (%d)\n", yytext, atoi( yytext ) );} + +['][.]['] | [']\\[nt'\\]['] {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ + +"true" {printf( "C_TRUE: %s (%d)\n", yytext, atoi( yytext ) );} + +"false" {printf( "C_FALSE: %s (%d)\n", yytext, atoi( yytext ) );} + +["][.]+["] {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} %% -//user code +/* user code */ From 145d3eeb85d874364198fe6af8c01bac2d646ff4 Mon Sep 17 00:00:00 2001 From: Partho Bhattacharya Date: Fri, 7 Feb 2025 14:31:10 -0500 Subject: [PATCH 2/6] updated lex file to have working code for all except for chars and strings. Also added the test files for just Int Testing and for testing for all literals (that is still failing for char and strings) --- lexicalStructure.lex | 36 +++++++++++++++++++++++++----------- simpleIntTest.txt | 4 ++++ simpleLiteralTest.a | 11 +++++++++++ 3 files changed, 40 insertions(+), 11 deletions(-) create mode 100644 simpleIntTest.txt create mode 100644 simpleLiteralTest.a diff --git a/lexicalStructure.lex b/lexicalStructure.lex index 706baca..de6d394 100644 --- a/lexicalStructure.lex +++ b/lexicalStructure.lex @@ -1,23 +1,37 @@ -/* Lexical Analysis with Flex (2.6.0) We used some of the code from this manual */ -/* so we placed the citation here. */ -/* definitions */ + /* Lexical Analysis with Flex (2.6.0) We used some of the code from this manual */ + /* so we placed the citation here. */ + /* definitions */ %option noyywrap - %{ - #include - %} +%{ +#include "typedefs.h" +%} +DIGIT [0-9] %% -/* rules */ -[0-9]+ {printf( "C_INTEGER: %s (%d)\n", yytext, atoi( yytext ) );} + /* rules */ +{DIGIT}+ {printf( "C_INTEGER: %s (%d)\n", yytext, atoi( yytext ) );} "null" {printf( "C_NULL: %s (%d)\n", yytext, atoi( yytext ) );} -['][.]['] | [']\\[nt'\\]['] {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ +"'"[.|\n]"'" {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ "true" {printf( "C_TRUE: %s (%d)\n", yytext, atoi( yytext ) );} "false" {printf( "C_FALSE: %s (%d)\n", yytext, atoi( yytext ) );} -["][.]+["] {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} +"\""[\^{}}\n]*"\"" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} + +.|\n + %% -/* user code */ + /* user code */ + +int main( int argc, char **argv ) +{ + argc--, argv++; /* skip over program name */ + if ( argc > 0 ) + yyin = fopen( argv[0], "r" ); + else + yyin = stdin; + yylex(); +} diff --git a/simpleIntTest.txt b/simpleIntTest.txt new file mode 100644 index 0000000..507ec22 --- /dev/null +++ b/simpleIntTest.txt @@ -0,0 +1,4 @@ +45 +123 +8392 + diff --git a/simpleLiteralTest.a b/simpleLiteralTest.a new file mode 100644 index 0000000..d93c06a --- /dev/null +++ b/simpleLiteralTest.a @@ -0,0 +1,11 @@ +"this is a string" 721398 'g' '/n' +12893 "this is not a string +" +false +nullfalse +"nulltrue +null +'7' +true +'189 + From db66b040336455d7ebc6cf38b59bee119598facc Mon Sep 17 00:00:00 2001 From: Partho Bhattacharya Date: Fri, 7 Feb 2025 15:40:48 -0500 Subject: [PATCH 3/6] got some chars to identify and edited the simple literal test to include more examples -Task01 --- lexicalStructure.lex | 2 +- simpleLiteralTest.a | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lexicalStructure.lex b/lexicalStructure.lex index de6d394..5027607 100644 --- a/lexicalStructure.lex +++ b/lexicalStructure.lex @@ -13,7 +13,7 @@ DIGIT [0-9] "null" {printf( "C_NULL: %s (%d)\n", yytext, atoi( yytext ) );} -"'"[.|\n]"'" {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ +\'.\'|\'\\[nt]\' {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ "true" {printf( "C_TRUE: %s (%d)\n", yytext, atoi( yytext ) );} diff --git a/simpleLiteralTest.a b/simpleLiteralTest.a index d93c06a..d33541e 100644 --- a/simpleLiteralTest.a +++ b/simpleLiteralTest.a @@ -8,4 +8,9 @@ null '7' true '189 +'/n' +'/t' +'"' +'/' + From 6c3951b6ab098ab3ec4a68b695302aeff3901f9a Mon Sep 17 00:00:00 2001 From: Partho Bhattacharya Date: Fri, 7 Feb 2025 18:59:45 -0500 Subject: [PATCH 4/6] character identifier seems to work. Still working on strings. Updated tests to include more. Task01 --- lexicalStructure.lex | 7 ++++--- simpleLiteralTest.a | 12 ++++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lexicalStructure.lex b/lexicalStructure.lex index 5027607..d2b3bcf 100644 --- a/lexicalStructure.lex +++ b/lexicalStructure.lex @@ -1,4 +1,3 @@ - /* Lexical Analysis with Flex (2.6.0) We used some of the code from this manual */ /* so we placed the citation here. */ /* definitions */ %option noyywrap @@ -7,19 +6,21 @@ %} DIGIT [0-9] +CHAR \\\\n|\\\\t|\\\'|[^'\\\n\t] +STRINGVAL CHAR | " " %% /* rules */ {DIGIT}+ {printf( "C_INTEGER: %s (%d)\n", yytext, atoi( yytext ) );} "null" {printf( "C_NULL: %s (%d)\n", yytext, atoi( yytext ) );} -\'.\'|\'\\[nt]\' {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ +'{CHAR}' {printf( "C_CHARACTER: %s (%d)\n", yytext, atoi( yytext ) );} /*using double \ per documentation to show escaped chars*/ "true" {printf( "C_TRUE: %s (%d)\n", yytext, atoi( yytext ) );} "false" {printf( "C_FALSE: %s (%d)\n", yytext, atoi( yytext ) );} -"\""[\^{}}\n]*"\"" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} +"{STRINGVAL}+" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} .|\n diff --git a/simpleLiteralTest.a b/simpleLiteralTest.a index d33541e..e74e4ee 100644 --- a/simpleLiteralTest.a +++ b/simpleLiteralTest.a @@ -1,16 +1,24 @@ "this is a string" 721398 'g' '/n' 12893 "this is not a string " +''' +'\' false +''' nullfalse "nulltrue null '7' true '189 -'/n' -'/t' +'\t' '"' '/' +'\n' +'\'' +'\t' +'\\' +'\' +''' From f4a86f0de7751949e401cf8246d245a64da3cb85 Mon Sep 17 00:00:00 2001 From: Partho Bhattacharya Date: Mon, 10 Feb 2025 13:12:46 -0500 Subject: [PATCH 5/6] finished string and char updates and testing updates task 03 --- lexicalStructure.lex | 7 ++++--- simpleLiteralTest.a | 19 +++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/lexicalStructure.lex b/lexicalStructure.lex index d2b3bcf..f5315a1 100644 --- a/lexicalStructure.lex +++ b/lexicalStructure.lex @@ -6,8 +6,9 @@ %} DIGIT [0-9] -CHAR \\\\n|\\\\t|\\\'|[^'\\\n\t] -STRINGVAL CHAR | " " +CHAR \\n|\\t|\\'|[^'\n\t\\] +SCHAR \\n|\\t|\\\"|[^\"\n\\] + %% /* rules */ {DIGIT}+ {printf( "C_INTEGER: %s (%d)\n", yytext, atoi( yytext ) );} @@ -20,7 +21,7 @@ STRINGVAL CHAR | " " "false" {printf( "C_FALSE: %s (%d)\n", yytext, atoi( yytext ) );} -"{STRINGVAL}+" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} +\"{SCHAR}+\" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} .|\n diff --git a/simpleLiteralTest.a b/simpleLiteralTest.a index e74e4ee..e85644b 100644 --- a/simpleLiteralTest.a +++ b/simpleLiteralTest.a @@ -18,7 +18,22 @@ true '\'' '\t' '\\' - +'n' '\' - +'fdsf' +' +' +' ' ''' +"STRINGwithnotSPaces" +' ' +'\ ' +"J" +"" +" " +\"\" +"{SCHAR}" +"SCHAR" +"[SCHAR]" +"FINAL: I'd think this is a legal \"string\" that contains \n \t several escaped characters, isn't it?" +"I'd think this is a legal \"string\" that contains several \\n \t escaped characters, isn't it?" From 375743441746a7527f4edd8fbb0d5091a7a87da2 Mon Sep 17 00:00:00 2001 From: Partho Bhattacharya Date: Mon, 10 Feb 2025 23:41:37 -0500 Subject: [PATCH 6/6] added comment functionality for lexer, expanded tests, and updated to identify empty strings --- lexicalStructure.lex | 13 ++++++++++--- simpleLiteralTest.a | 11 +++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lexicalStructure.lex b/lexicalStructure.lex index f5315a1..1e2b52c 100644 --- a/lexicalStructure.lex +++ b/lexicalStructure.lex @@ -7,8 +7,13 @@ DIGIT [0-9] CHAR \\n|\\t|\\'|[^'\n\t\\] -SCHAR \\n|\\t|\\\"|[^\"\n\\] - + /* char can be a newline, tab, an escaped quote, or anything but a single quote, an actual line break, an actual tab, or a backslash by itself (to prevent confusion from escaped quote */ +SCHAR \\n|\\t|\\\"|[^\"\n\\] + /*similar to above, a string Char (SCHAR) is the same as a CHAR except we cannot have double quotes instead of single quotes. Double quotes need to be escaped in Flex unlike single quotes based on documentation */ +STARTCOM \(\* +ENDCOM \*\) +COMMENTCHAR [^\*]|\*[^\)] + /*Making the contents of a comment anything that is either not a * or not a * followed by ) to terminate comments at the first ENDCOM */ %% /* rules */ {DIGIT}+ {printf( "C_INTEGER: %s (%d)\n", yytext, atoi( yytext ) );} @@ -21,7 +26,9 @@ SCHAR \\n|\\t|\\\"|[^\"\n\\] "false" {printf( "C_FALSE: %s (%d)\n", yytext, atoi( yytext ) );} -\"{SCHAR}+\" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} +\"{SCHAR}*\" {printf( "C_STRING: %s (%d)\n", yytext, atoi( yytext ) );} + +{STARTCOM}{COMMENTCHAR}*{ENDCOM} {printf( "COMMENT: %s (%d)\n", yytext, atoi( yytext ) );} .|\n diff --git a/simpleLiteralTest.a b/simpleLiteralTest.a index e85644b..f97cf52 100644 --- a/simpleLiteralTest.a +++ b/simpleLiteralTest.a @@ -1,9 +1,15 @@ -"this is a string" 721398 'g' '/n' -12893 "this is not a string +"this is a string" 721398 'g' '/n' (* should print 3 tokens before this *) +' +' +12893 "this is not a string (*one valid token before this*) +(* spacey comment here +over multiple lines +will it work? *) " ''' '\' false +(**) ''' nullfalse "nulltrue @@ -21,6 +27,7 @@ true 'n' '\' 'fdsf' +(*/jnewjno2893u86^ Lots of random characters /n /t '") *) ' ' ' '