Files
compiler-the-translators/src/grammar.y
2025-04-04 15:42:50 -04:00

730 lines
22 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Syntax Analyzer with Bison (3.8.2) */
/* The Translators - Spring 2025 */
// ----- THIS FILE MUST BE FORMATTED CORRECTLY FOR READABILITY ----- //
// ✏️ FORMATTING RULES:
// 1⃣ Use 4 spaces for indentation.
// 2⃣ Grammar rules (terminals and nonterminals) should always be on their own line.
// 3⃣ Grammar rules and C-blocks should always begin 8 spaces in.
// 4⃣ Rule end-markers (;, |) should always be 4 spaces in.
// 5⃣ C-blocks should always be clearly defined and follow clang formatting rules.
// 6⃣ 1-line if/for/while statements must be wrapped in curly braces.
// 7⃣ Comments should always be above rules
// 8⃣ DO NOT USE TABS. EVER.
// Please ask Scarlett if you are unsure of how to format something. Thanks! 😀
%{
#include "../src/symbol_table.c"
void yyerror(const char *err);
int token_tracker;
TableNode * tn;
%}
%locations
%union {
int integ;
char * words;
}
%type <integ> idlist
%type <words> assignable
%type <words> expression
%type <words> constant
%type <words> id_or_types
%type <words> types
%token <words> ID 101
%token <words> T_INTEGER 201
%token <words> T_ADDRESS 202
%token <words> T_BOOLEAN 203
%token <words> T_CHARACTER 204
%token <words> T_STRING 205
%token <integ> C_INTEGER 301
%token <words> C_NULL 302
%token <words> C_CHARACTER 303
%token <words> C_STRING 304
%token <words> C_TRUE 305
%token <words> C_FALSE 306
%token WHILE 401
%token IF 402
%token THEN 403
%token ELSE 404
%token TYPE 405
%token FUNCTION 406
%token RETURN 407
%token EXTERNAL 408
%token AS 409
%token L_PAREN 501
%token R_PAREN 502
%token L_BRACKET 503
%token R_BRACKET 504
%token L_BRACE 505
%token R_BRACE 506
%token SEMI_COLON 507
%token COLON 508
%token COMMA 509
%token ARROW 510
%token MUL 603
%token DIV 604
%token REM 605
%token ADD 601
%token LESS_THAN 606
%token EQUAL_TO 607
%token AND 610
%token OR 611
%token ASSIGN 608
%token SUB_OR_NEG 602
%token NOT 609
%token DOT 612
%token RESERVE 613
%token RELEASE 614
%token COMMENT 700
//precedence order
%left ASSIGN
%left OR
%left AND
%left EQUAL_TO
%left LESS_THAN
%left ADD SUB_OR_NEG
%left MUL DIV REM
%precedence NOT
%precedence UMINUS
%precedence DOT
%precedence RESERVE RELEASE
%%
program:
prototype_or_definition_list
;
prototype_or_definition_list:
prototype prototype_or_definition_list
| definition prototype_or_definition_list
| prototype
| definition
;
prototype:
L_PAREN EXTERNAL R_PAREN FUNCTION ID COLON ID;
definition:
TYPE ID COLON
{
printdebug("Currently see a record definition for %s", $<words>2);
tn = CreateEntry(getAncestor(cur), recprime, $2, CreateRecordInfo(0, cur = CreateScope(cur, 0, 0)));
if (table_lookup(getAncestor(cur), $2) == undefined) {
printdebug("rec not found");
}
}
dblock
{
setRecSize(table_lookup(getParent(cur), $2), getRecSize(cur));
cur = getParent(cur);
}
| TYPE ID COLON C_INTEGER ARROW id_or_types
{
printdebug("Currently see a array definition of name %s,storing type %s, of dimensions %d", $2, $6, $4);
CreateEntry(cur, arrayprim, $2, CreateArrayInfo($4, look_up(cur, $6)));
printdebug("%sID: %s, dimensions: %d, typeOfArray: %s", COLOR_GREEN, $2, $4, $6);
}
| function_declaration
| TYPE ID COLON id_or_types ARROW id_or_types
{
printdebug("Currently see a function type definition of name %s,parameter type %s, of return type %s", $2, $4, $6);
CreateEntry(cur,funtypeprime,$2,CreateFunctionTypeInfo(table_lookup(cur,$4),table_lookup(cur,$6)));
}
| ID
{
TableNode *node = table_lookup(getAncestor(cur), $<words>1);
if (node == undefined) {
printdebug("function not declared at line %d, column %d", @1.first_line, @1.first_column);
} else if(getAdInfoType(node) != TYPE_FUNCTION_DECLARATION) {
printdebug("function not declared at line %d, column %d", @1.first_line, @1.first_column);
} else {
setStartLine(node, @1.first_line);
setAsKeyword(node, false);
}
cur = CreateScope(cur, 0, 0);
}
L_PAREN ID
{
printdebug("Currently see a function definition taking only one parameter (no as) of name %s and argument name %s", $1,$4);
CreateEntry(cur, getParameter(table_lookup(getAncestor(cur), getType(table_lookup(getAncestor(cur), $<words>1)))), $<words>4, NULL);
}
R_PAREN ASSIGN sblock
| ID
{
TableNode *node = table_lookup(getAncestor(cur), $<words>1);
if (node == undefined) {
printdebug("null check");
}
if (node == undefined) {
printdebug("function not declared at line %d, column %d", @1.first_line, @1.first_column);
} else if (getAdInfoType(node) != TYPE_FUNCTION_DECLARATION) {
printdebug("function not declared at line %d, column %d", @1.first_line, @1.first_column);
} else {
setStartLine(node, @1.first_line);
setAsKeyword(node, true);
}
cur = CreateScope(cur, 0, 0);
}
AS L_PAREN
{
TableNode *parameter = getParameter(table_lookup(getAncestor(cur), getType(table_lookup(getAncestor(cur), $<words>1))));
printdebug("%s", getType(parameter));
if (parameter == undefined) {
printdebug("function defined with as, but parameter is undefined at line %d, column %d", @1.first_line, @1.first_column);
} else if(getAdInfoType(parameter) != TYPE_RECORD) {
printdebug("record: %s., primitive: %s.", getType(parameter), getName(recprime));
printdebug("function defined with as, but parameter is type %s at line %d, column %d", getType(parameter),@1.first_line, @1.first_column);
} else {
for (TableNode* entry = getFirstEntry(getRecList(parameter)); entry!= NULL; entry = getNextEntry(entry)) {
CreateEntry(cur, entry->theType, NULL, NULL);
}
}
}
idlist
{
printdebug("Currently see a function definition taking one paramter (with as) of name %s and number of arguments %d", $1,$6);
}
R_PAREN ASSIGN sblock
;
function_declaration:
FUNCTION ID COLON ID
{
CreateEntry(cur, look_up(cur, $4), $2, CreateFunctionDeclarationInfo(-1, false));
}
| EXTERNAL FUNCTION ID COLON ID
{
CreateEntry(cur, look_up(cur, $5), $3, NULL);
}
;
idlist:
ID
{
TableNode *entry = getFirstEntry(cur);
while (strcmp(getName(entry),"undefined") != 0) {
entry = getNextEntry(entry);
}
if (getNextEntry(entry) == NULL) {
printdebug("too many parameters at line %d column %d", @1.first_line, @1.first_column);
}
addName(entry, $<words>1);
}
COMMA idlist
{
$$ = $<integ>4 + 1;
}
| ID
{
TableNode *entry = getFirstEntry(cur);
while (strcmp(getName(entry),"undefined") != 0) {
entry = getNextEntry(entry);
}
if (getNextEntry(entry) != NULL) {
printdebug("too many parameters at line %d column %d", @1.first_line, @1.first_column);
}
addName(entry, $<words>1);
$$ = 1;
}
;
sblock:
L_BRACE
{
if (getLine(cur) != 0) {
cur = CreateScope(cur,@1.first_line,@1.first_column);
} else {
setLineNumber(cur, @1.first_line);
setColumnNumber(cur,@1.first_line);
}
}
statement_list
{
cur = getParent(cur);
}
R_BRACE
| L_BRACE
{
if (getLine(cur) != 0 && getColumn(cur)) {
cur = CreateScope(cur,@1.first_line,@1.first_column);
}
}
dblock
{
printdebug("seen sblock with dblock");
}
statement_list
{
cur = getParent(cur);
}
R_BRACE
;
dblock:
L_BRACKET
{
if(getLine(cur)==0) {
setLineNumber(cur, @1.first_line);
setColumnNumber(cur,@1.first_line);
} else {
cur = CreateScope(cur,@1.first_line,@1.first_column);
}
}
declaration_list R_BRACKET;
declaration_list:
declaration SEMI_COLON declaration_list
| declaration
;
declaration:
id_or_types COLON ID
{
printdebug("ID/TYPE: %s, ID: %s", $<words>1, $<words>3) ;
CreateEntry(cur,table_lookup(getAncestor(cur),$<words>1),$<words>3,NULL);
}
;
id_or_types:
ID
{
printdebug("string of id is %s in ID pattern of id_or_type rule.", $1); $$ = $1;
}
| types
{
printdebug("string of type is %s in types pattern of id_or_type rule.",$1);
$$ = $1;
}
;
statement_list:
compound_statement statement_list
| compound_statement
| simple_statement SEMI_COLON statement_list
| simple_statement SEMI_COLON
;
compound_statement:
WHILE L_PAREN expression R_PAREN sblock
| IF L_PAREN expression R_PAREN THEN sblock ELSE sblock
| sblock
;
simple_statement:
assignable ASSIGN expression
{
if(strcmp($1, $3) == 0) {
printdebug("Passed standard type check; assignable = expression");
} else if((strcmp(getType(look_up(cur, $1)), "array") == 0) && (strcmp($3, "address") == 0)) {
printdebug("%s[☺] Passed array type check; %s = %s", COLOR_GREEN, $1, $3);
} else if((strcmp(getType(look_up(cur, $1)), "record") == 0) && (strcmp($3, "address") == 0)) {
printdebug("%s[☺] Passed address type check; %s = %s", COLOR_GREEN, $1, $3);
} else if((strcmp(getType(look_up(cur, $1)), "function type primitive") == 0) && (strcmp($3, "address") == 0)) {
printdebug("%s[☺] Passed function type primitive type check; %s = %s", COLOR_GREEN, $1, $3);
// } else if () {
// } else if(strcmp(getType(table_lookup(cur, $1)), getType(table_lookup(cur, $3))) == 0) {
// printdebug("%s[] Passed double lookup type check; %s = %s", COLOR_GREEN, $1, $3);
} else {
printdebug("%s[TYPE ERROR] %sMismatch at %sline %d and column %d%s", COLOR_ORANGE, COLOR_WHITE, COLOR_YELLOW, @2.first_line, @2.first_column, COLOR_WHITE);
printdebug(" - Invalid types %s$1: %s and $3: %s%s", COLOR_YELLOW, $1, $3, COLOR_WHITE);
printdebug(" - %sgetType for address: %s", COLOR_YELLOW, getType(look_up(cur, $1)));
}
}
| RETURN expression
;
rec_op:
DOT
ablock:
L_PAREN argument_list R_PAREN
{
$<integ>$ = $<integ>2;
printdebug("ablock is %d", $<integ>$);
}
;
argument_list:
expression COMMA argument_list
{
CreateEntry(cur, look_up(cur, $1), "", NULL);
$<integ>$ = $<integ>3 + 1;
printdebug("[ARGUMENT_LIST] argument list is %d", $<integ>$);
}
| expression
{
CreateEntry(cur, look_up(cur, $1), "", NULL);
$<integ>$ = 1; printdebug("[ARGUMENT_LIST] argument list is %d", $<integ>$);
}
;
// will ALWAYS be a TYPE
expression:
constant
{
printdebug("constant expression");
$$ = $<words>1;
}
| SUB_OR_NEG expression %prec UMINUS
{
printdebug("negative expression");
if(strcmp($2,"integer") != 0) {
printdebug("cant negate something not an integer at line %d and column %d",@2.first_line,@2.first_column);
$$=strdup("undefined");
} else {
$$=$2;
}
}
| NOT expression
{
printdebug("not expression");
if(strcmp($2,"Boolean")==0) {
$$=$2;
} else {
$$=strdup("undefined");
printdebug("mismatch at line %d and column %d. Invalid type being negated is %s", @1.first_line,@1.first_column,$2);
}
}
| expression ADD expression
{
printdebug("add expression");
if(strcmp($1,$3)==0 && strcmp($1,"integer")==0) {
$$=strdup("integer");
} else {
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression SUB_OR_NEG expression
{
printdebug("sub or neg expression");
if(strcmp($1,$3)==0 &&strcmp($1,"integer")==0) {
$$=strdup("integer");
} else {
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression MUL expression
{
printdebug("multiply expression");
if(strcmp($1,$3)==0 &&strcmp($1,"integer")==0) {
$$=strdup("integer");
} else{
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression DIV expression
{
printdebug("divide expression");
if(strcmp($1,$3)==0 && strcmp($1,"integer")==0) {
$$=strdup("integer");
} else {
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression REM expression
{
printdebug("remainder expression");
if(strcmp($1,$3)==0 && strcmp($1,"integer")==0) {
$$=strdup("integer");
} else {
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression AND expression
{
printdebug("AND expression");
if(strcmp($1,$3)==0 && strcmp($1,"Boolean")==0) {
$$=strdup("Boolean");
} else{
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression OR expression
{
printdebug("OR");
if(strcmp($1,$3)==0 && strcmp($1,"Boolean")==0) {
$$=strdup("Boolean");
} else {
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression LESS_THAN expression
{
printdebug("less than expression");
if(strcmp($1,$3)==0 && strcmp($1,"integer")==0) {
$$=strdup("Boolean");
} else {
printdebug("mismatch at line %d and column %d. Invalid types %s and %s.", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| expression EQUAL_TO expression
{
printdebug("equals check expression");
if(strcmp($1,$3)==0) {
$$=strdup("Boolean");
} else {
printdebug("mismatch at line %d and column %d. Invalid types %s and %s", @2.first_line,@2.first_column,$1,$3);
$$=strdup("undefined");
}
}
| assignable
{
printdebug("assignable expression. current type is %s",$1);
$$=$1;
}
| L_PAREN expression R_PAREN
{
printdebug("paren expression. current type is %s",$2);
$$=$2;
}
| memOp assignable
{
$$ = strdup("address");
}
;
// prolly right, check back with me later
// add array case
// include type check for ablock in arrays - ablock is always the int of the elements in array/rec
assignable:
ID
{
$$ = getType(look_up(cur,$1));
printdebug("[ASSIGNABLE - RULE 1] assignable = type: %s | ID = %s", $$, $1);
}
| assignable
{
printdebug("%sBeginning rule 2 of assignable.", COLOR_CYAN);
cur = CreateScope(cur, -1,-1);
}
ablock
{
int type = getAdInfoType(look_up(getParent(cur), $1));
printdebug("%stype is %d", COLOR_PURPLE, type);
if (type == TYPE_FUNCTION_DECLARATION) {
printdebug("%sEntering function call", COLOR_LIGHTGREEN);
if (getAsKeyword(look_up(getParent(cur), $1))) {
TableNode *param = getParameter(look_up(getParent(cur), $1));
SymbolTable *recList = getRecList(param);
TableNode *lastCheckedRef = getFirstEntry(recList);
TableNode *lastCheckedAct = getFirstEntry(cur);
while (getNextEntry(lastCheckedRef) != NULL) {
lastCheckedRef = getNextEntry(lastCheckedRef);
}
//this isn't very efficient, but will hopefully work
while (lastCheckedAct != NULL && lastCheckedRef != NULL) {
if (strcmp(getName(lastCheckedAct), getName(lastCheckedRef)) != 0) {
printdebug("expected %s expression in function call but got %s at line %d and column %d",getType(lastCheckedRef), getName(lastCheckedAct), @3.first_line, @3.first_column);
}
lastCheckedAct = getNextEntry(lastCheckedAct);
TableNode *tn = getFirstEntry(recList);
while (getNextEntry(tn) != lastCheckedRef) {
tn = getNextEntry(tn);
}
lastCheckedRef = tn;
}
} else {
char *expected = getName(getParameter(look_up(getParent(cur), $1)));
char *actual = getType(getFirstEntry(cur));
if (strcmp(expected, actual) != 0) {
printdebug("expected %s expression in function call but got %s at line %d and column %d",expected, actual, @3.first_line, @3.first_column);
}
}
$$ = getName(getReturn(table_lookup(getAncestor(cur), $1)));
printdebug("[ASSIGNABLE - RULE 2] assignable = type: %s | name_func = %s", $$, $1);
} else if (type == TYPE_ARRAY_TYPE) {
printdebug("%sEntering array call", COLOR_LIGHTGREEN);
if (getNumArrDim(look_up(getParent(cur), $1)) != $<integ>2) {
printdebug("expected %d arguments but had %d at line %d and column %d\n", getNumArrDim(look_up(cur, $1)), $<integ>2, @2.first_line, @2.first_column);
}
$$ = getName(getArrType(look_up(getParent(cur), $1)));
printdebug("[ASSIGNABLE - RULE 2] assignable = type: %s | name_func = %s", $$, $1);
}
cur = getParent(cur);
}
| assignable rec_op ID
{
if(undefined != table_lookup(getRecList(table_lookup(getAncestor(cur), $1)), $3)) {
$$ = getName(table_lookup(getRecList(table_lookup(getAncestor(cur), $1)), $3));
}
printdebug("[ASSIGNABLE - RULE 3] assignable = type: %s | ID = %s", $$, $1);
}
;
memOp:
RESERVE
{
printdebug("reserve expression");
}
| RELEASE
{
printdebug("release expression");
}
;
constant:
C_STRING
{
$$ = $<words>1;
printdebug("string of C_STRING in constant is %s",$<words>1);
}
| C_INTEGER
{
$$ = "integer";
printdebug("string of C_INTEGER in constant is integer");
}
| C_NULL
{
$$ = $<words>1;
printdebug("string of C_NULL in constant is %s",$<words>1);
}
| C_CHARACTER
{
$$ = $<words>1;
printdebug("string of C_CHARACTER in constant is %s",$<words>1);
}
| C_TRUE
{
$$ = $<words>1;
printdebug("string of C_TRUE in constant is %s",$<words>1);
}
| C_FALSE
{
$$ = $<words>1;
printdebug("string of C_FALSE in constant is %s",$<words>1);
}
;
types:
T_INTEGER
{
$$ = $1;
printdebug("string of T_INTEGER in types is %s",$<words>1);
}
| T_ADDRESS
{
$$ = $1;
printdebug("string of T_ADDRESS in types is %s",$<words>1);
}
| T_CHARACTER
{
$$ = $1;
printdebug("string of T_CHARACTER in types is %s",$<words>1);
}
| T_BOOLEAN
{
$$ = $1;
printdebug("string of T_BOOLEAN in types is %s",$<words>1);
}
;
%%
void yyerror(const char *err) {
fprintf(stderr, "ERROR: %s at token %s at line number %d,column number %d", err,yytext,yylloc.first_line,yylloc.first_column);
}