Merge branch 'Dev' into Sprint1-TokenizeID_MainFunction-FE-t#04

2025-02-12 12:57:14 -05:00
parent 5602481b52 a9d4f8a3ac
commit b6c2d33c42
11 changed files with 2054 additions and 29 deletions
--- a/21
+++ b/21
@@ -7,25 +7,10 @@ lexicalStructure:
 	$(FLEX) $(LEX)
 	$(CC) lex.yy.c -o $(EXE)

-test1:
-	./$(EXE) ./tests/test_constants_literals.alpha
-
-test2:
-	./$(EXE) ./tests/test_keywards.alpha
-
-test3:
-	./$(EXE) ./tests/test_types.alpha
-
-test4:
-	./$(EXE) ./tests/test_variables.alpha
-
-test5:
-	./$(EXE) ./tests/test_otherpunc.alpha
-
-test6:
-	./$(EXE) ./tests/test_comments.alpha
+test_operators:
+	./$(EXE) ./tests/test_operators.alpha

 clean:
 	rm -f *.o
 	rm -f lex.yy.c
-	rm -f $(EXE)
+	rm -f $(EXE)
--- a/lex.yy.c
+++ b/lex.yy.c
--- a/lexicalStructure.lex
+++ b/lexicalStructure.lex
@@ -1,32 +1,72 @@
-/* Lexical Analysis with Flex (2.6.0) We used some of the code from this manual */
+/* Lexical Analysis with Flex (1.6.0) We used some of the code from this manual */
 /*                                    so we placed the citation here. */
 /* definitions */
+
 %option noyywrap
 %{
 #include "typedefs.h"
+int line_number = 1, column_number = 1;
 %}

 COM ([^*]|\*+[^)*])*
 ID [A-Za-z_][0-9A-Za-z_]*
+DIGIT [0-9]
+CHAR \\n|\\t|\\'|[^'\n\t\\]
+	/* char can be a newline, tab, an escaped quote, or anything but a single quote, an actual line break, an actual tab, or a backslash by itself (to prevent confusion from escaped quote */
+SCHAR \\n|\\t|\\\"|[^\"\n\\]  
+	/* similar to above, a string Char (SCHAR) is the same as a CHAR except we cannot have double quotes instead of single quotes. Double quotes need to be escaped in Flex unlike single quotes based on documentation */

 %%

-"(*"{COM}"*)"   {return COMMENT;}
+"integer"       {return T_INTEGER;}
+"address"       {return T_ADDRESS;}
+"Boolean"       {return T_BOOLEAN;}
+"character"     {return T_CHARACTER;}
+
+{DIGIT}+        {return C_INTEGER;}
+"null"          {return C_NULL;}
+
+"while"         {return WHILE;}
+"if"            {return IF;}
+"then"          {return THEN;}
+"else"          {return ELSE;}
+"type"          {return TYPE;}
+"function"      {return FUNCTION;}
+"return"        {return RETURN;}
+"external"      {return EXTERNAL;}
+"as"            {return AS;}
+
+'{CHAR}'        {return C_CHARACTER;}
+"true"          {return C_TRUE;}
+"false"         {return C_FALSE;}
+
+"+"		        {return ADD;}
+"-"		        {return SUB_OR_NEG;}
+"*"		        {return  MUL;}
+"/"		        {return DIV;}
+"%"		        {return REM;}
+"<"		        {return LESS_THAN;}
+"="		        {return EQUAL_TO;}
+":="		    {return ASSIGN;}
+"!"		        {return NOT;}
+"&"		        {return AND;}
+"|"		        {return OR;}
+"."		        {return DOT;}

 ";"             {return SEMI_COLON;}
 ":"             {return COLON;}
 ","             {return COMMA;}
 "->"            {return ARROW;}

+"reserve"	    {return RESERVE;}
+"release" 	    {return RELEASE;}
+
+\"{SCHAR}*\"    {return C_STRING;}
+"(*"{COM}"*)"   {return COMMENT;}
+
 {ID}            {return ID;}

-%%
+\n              {line_number++; column_number = 1;}
+.               {column_number++;}

-int main(int argc, char *argv[]){
-        argc--, argv++;
-        if ( argc > 0 )
-                yyin = fopen( argv[0], "r" );
-        else
-                yyin = stdin;
-        yylex();
-}
+%%
--- a/runner.c
+++ b/runner.c
@@ -0,0 +1,38 @@
+#include "runner.h"
+
+
+int main(int argc, char *argv[]) {
+    char *check_input;
+    int token;
+    //check_input can be compared to INVALID_ARG and DIFF_ARG to determine if -tok and holds the generated file name if it is
+    check_input = is_tok(argc, argv);
+    FILE * output = fopen(check_input, "w");
+
+    if (check_input == INVALID_ARG) {
+        return -1;
+    }
+    
+    while (0 != (token = yylex())) {
+        if (check_input != DIFF_ARG) {
+            fprintf(output, "%d %d %3d \"%s\"\n", line_number, column_number, token, yytext);
+        }        
+    }
+
+    return 0;
+}
+
+char *is_tok(int argc, char *argv[]) {
+    if (argc == 3 && strcmp("-tok", argv[1])) {
+        char *input_prog = argv[2];
+        int file_len = strlen(input);
+        //check that input program is a .alpha file
+        if (strcmp(".alpha", input_prog[file_len - ALPHA_OFFSET]) != 0) {
+            return INVALID_ARG;
+        }
+        char *FILE_tok[file_len - ALPHA_OFFSET + TOK_LEN]; 
+        strncpy(input, FILE_tok, file_len - ALPHA_OFFSET); //copy name of prog before .alpha
+        strcpy(".tok", FILE_tok[file_len - ALPHA_OFFSET]); //add .tok to end of file name
+        return FILE_tok;
+    }
+    return DIFF_ARG;
+} 
--- a/runner.h
+++ b/runner.h
@@ -0,0 +1,13 @@
+#define ALPHA_OFFSET 5
+#define TOK_LEN 3
+#define INVALID_ARG "invalid"
+#define DIFF_ARG "diff"
+#include <string.h>
+#include <std.io>
+#include "lex.yy.c"
+
+extern int line_number, column_number;
+extern char *yytext;
+
+int main(int argc, char* argv);
+char *is_tok(int argc, char* argv);#define ALPHA_OFFSET 5
--- a/BIN
+++ b/BIN
--- a/tests/generalTokenTest.alpha
+++ b/tests/generalTokenTest.alpha
@@ -0,0 +1,24 @@
+This is a test
+9combined 7okens
+12345
+893247892
+combined'DueToUnknownChar _validtoken __validtoken1 _valid_token2 validToken3_
+true false
+null while !wrong if when
+else	type	function
+return  external        as
+string _NOte_that_was_not_reserved
+([)]{}:;,->"\
+-*/%
+<=
+:=
+"This is not a valid
+String"
+"This is a valid String"
+!|
+..
+(* this is a comment *)
+(*Not a comment
+$^&
+>
+
--- a/tests/simpleIntTest.alpha
+++ b/tests/simpleIntTest.alpha
@@ -0,0 +1,4 @@
+45
+123
+8392
+
--- a/tests/simpleLiteralAndCommentsTest.alpha
+++ b/tests/simpleLiteralAndCommentsTest.alpha
@@ -0,0 +1,47 @@
+"this is a string" 721398 'g' '/n' (* should print 3 tokens before this *)
+'
+' 
+12893 "this is not a string (*one valid token before this*)
+(* spacey comment here     
+over multiple lines
+will it work? *)
+"
+'''
+'\'
+false
+(**)
+'''
+nullfalse
+"nulltrue
+null
+'7'
+true
+'189
+'\t'
+'"'
+'/'
+'\n'
+'\''
+'\t'
+'\\'
+'n'
+'\'
+'fdsf'
+(*/jnewjno2893u86^ 	Lots of random characters /n /t '") *)
+'
+'
+'	'
+'''
+"STRINGwithnotSPaces"
+' '
+'\ '
+"J"
+""
+" "
+\"\"
+"{SCHAR}"
+"SCHAR"
+"[SCHAR]"
+"FINAL: I'd think this is a legal \"string\" that contains \n \t several escaped characters, isn't it?"
+"I'd think this is a legal \"string\" that contains several \\n \t escaped characters, isn't it?"
+nullLike
--- a/tests/test_keywords.alpha
+++ b/tests/test_keywords.alpha
@@ -0,0 +1,29 @@
+while
+While
+whiLe
+if
+IF
+If
+iF
+then
+Then
+theN
+else
+eLse
+elSe
+Else
+type
+Type
+tyPe
+function
+Function
+functioN
+return
+Return
+returN
+external
+External
+exteRnal
+as
+As
+aS
--- a/tests/test_operators.alpha
+++ b/tests/test_operators.alpha
@@ -0,0 +1,23 @@
+
+-
+*
+/
+\
+%
+<
+>
+=
+:=
+=:
+:
+=
+!
+&
+|
+.
+relEASE
+release
+RELEASE
+reserve
+RESERVE
+reSERVe