Open tncks opened 4 days ago
test 1
/* A program to perform Euclid's
Algorithm to computer gcd */
int gcd (int u, int v)
{
if (v == 0) return u;
else return gcd(v,u-u/v*v);
/* u-u/v*v == u mod v */
}
void main(void)
{
int x; int y;
x = input(); y = input();
output(gcd(x,y));
}
result 1
C-MINUS COMPILATION: ./test.1.txt
4: reserved word: int
4: ID, name= gcd
4: (
4: reserved word: int
4: ID, name= u
4: ,
4: reserved word: int
4: ID, name= v
4: )
5: {
6: reserved word: if
6: (
6: ID, name= v
6: ==
6: NUM, val= 0
6: )
6: reserved word: return
6: ID, name= u
6: ;
7: reserved word: else
7: reserved word: return
7: ID, name= gcd
7: (
7: ID, name= v
7: ,
7: ID, name= u
7: -
7: ID, name= u
7: /
7: ID, name= v
7: *
7: ID, name= v
7: )
7: ;
9: }
11: reserved word: void
11: ID, name= main
11: (
11: reserved word: void
11: )
12: {
13: reserved word: int
13: ID, name= x
13: ;
13: reserved word: int
13: ID, name= y
13: ;
14: ID, name= x
14: =
14: ID, name= input
14: (
14: )
14: ;
14: ID, name= y
14: =
14: ID, name= input
14: (
14: )
14: ;
15: ID, name= output
15: (
15: ID, name= gcd
15: (
15: ID, name= x
15: ,
15: ID, name= y
15: )
15: )
15: ;
16: }
17: EOF
Makefile (professor release version)
# Makefile for C-Minus Scanner
# ./lex/tiny.l --> ./cminus.l
CC = gcc
CFLAGS = -W -Wall
OBJS = main.o util.o scan.o
OBJS_LEX = main.o util.o lex.yy.o
.PHONY: all clean
all: cminus_cimpl cminus_lex
clean:
-rm -vf cminus_cimpl cminus_lex *.o lex.yy.c
cminus_cimpl: $(OBJS)
$(CC) $(CFLAGS) -o $@ $(OBJS)
cminus_lex: $(OBJS_LEX)
$(CC) $(CFLAGS) -o $@ $(OBJS_LEX) -lfl
main.o: main.c globals.h util.h scan.h
$(CC) $(CFLAGS) -c -o $@ $<
scan.o: scan.c globals.h util.h scan.h
$(CC) $(CFLAGS) -c -o $@ $<
util.o: util.c globals.h util.h
$(CC) $(CFLAGS) -c -o $@ $<
lex.yy.o: lex.yy.c globals.h util.h scan.h
$(CC) $(CFLAGS) -c -o $@ $<
lex.yy.c: cminus.l
flex -o $@ $<
Good Reference (focusing on RE, only rules section)
%{
#include "y.tab.h"
int countn=0;
%}
%option yylineno
alpha [a-zA-Z]
digit [0-9]
unary "++"|"--"
%%
"printf" { strcpy(yylval.nd_obj.name,(yytext)); return PRINTFF; }
"scanf" { strcpy(yylval.nd_obj.name,(yytext)); return SCANFF; }
"int" { strcpy(yylval.nd_obj.name,(yytext)); return INT; }
"float" { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT; }
"char" { strcpy(yylval.nd_obj.name,(yytext)); return CHAR; }
"void" { strcpy(yylval.nd_obj.name,(yytext)); return VOID; }
"return" { strcpy(yylval.nd_obj.name,(yytext)); return RETURN; }
"for" { strcpy(yylval.nd_obj.name,(yytext)); return FOR; }
"if" { strcpy(yylval.nd_obj.name,(yytext)); return IF; }
"else" { strcpy(yylval.nd_obj.name,(yytext)); return ELSE; }
^"#include"[ ]*<.+\.h> { strcpy(yylval.nd_obj.name,(yytext)); return INCLUDE; }
"true" { strcpy(yylval.nd_obj.name,(yytext)); return TRUE; }
"false" { strcpy(yylval.nd_obj.name,(yytext)); return FALSE; }
[-]?{digit}+ { strcpy(yylval.nd_obj.name,(yytext)); return NUMBER; }
[-]?{digit}+\.{digit}{1,6} { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT_NUM; }
{alpha}({alpha}|{digit})* { strcpy(yylval.nd_obj.name,(yytext)); return ID; }
{unary} { strcpy(yylval.nd_obj.name,(yytext)); return UNARY; }
"<=" { strcpy(yylval.nd_obj.name,(yytext)); return LE; }
">=" { strcpy(yylval.nd_obj.name,(yytext)); return GE; }
"==" { strcpy(yylval.nd_obj.name,(yytext)); return EQ; }
"!=" { strcpy(yylval.nd_obj.name,(yytext)); return NE; }
">" { strcpy(yylval.nd_obj.name,(yytext)); return GT; }
"<" { strcpy(yylval.nd_obj.name,(yytext)); return LT; }
"&&" { strcpy(yylval.nd_obj.name,(yytext)); return AND; }
"||" { strcpy(yylval.nd_obj.name,(yytext)); return OR; }
"+" { strcpy(yylval.nd_obj.name,(yytext)); return ADD; }
"-" { strcpy(yylval.nd_obj.name,(yytext)); return SUBTRACT; }
"/" { strcpy(yylval.nd_obj.name,(yytext)); return DIVIDE; }
"*" { strcpy(yylval.nd_obj.name,(yytext)); return MULTIPLY; }
\/\/.* { ; }
\/\*(.*\n)*.*\*\/ { ; }
[ \t]* { ; }
[\n] { countn++; }
. { return *yytext; }
["].*["] { strcpy(yylval.nd_obj.name,(yytext)); return STR; }
['].['] { strcpy(yylval.nd_obj.name,(yytext)); return CHARACTER; }
%%
int yywrap() {
return 1;
}
pseudo-code algorithm reference simple version (DFA state machine)
TOKEN getRelop() // TOKEN has two components
TOKEN retToken = new(RELOP); // First component set here
while (true)
switch(state)
case 0: c = nextChar();
if (c == '<') state = 1;
else if (c == '=') state = 5;
else if (c == '>') state = 6;
else fail();
break;
case 1: ...
...
case 8: retract(); // an accepting state with a star
retToken.attribute = GT; // second component
return(retToken);
Pure pseudo-code algorithm for DFA Accept
s = s0; // start state.
c = nextChar(); // a priming read
while (c != eof) {
s = move(s,c);
c = nextChar();
}
if (s is in F, the set of accepting states) return yes
else return no
Print Formatting Core Logic here:
/* states in scanner DFA */
typedef enum
{ START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
StateType;
/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN+1];
/* BUFLEN = length of the input buffer for
source code lines */
#define BUFLEN 256
static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
/* getNextChar fetches the next non-blank character
from lineBuf, reading in a new line if lineBuf is
exhausted */
static int getNextChar(void)
{ if (!(linepos < bufsize))
{ lineno++;
if (fgets(lineBuf,BUFLEN-1,source))
{ if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
bufsize = strlen(lineBuf);
linepos = 0;
return lineBuf[linepos++];
}
else
{ EOF_flag = TRUE;
return EOF;
}
}
else return lineBuf[linepos++];
}
/* ungetNextChar backtracks one character
in lineBuf */
static void ungetNextChar(void)
{ if (!EOF_flag) linepos-- ;}
/* lookup table of reserved words */
static struct
{ char* str;
TokenType tok;
} reservedWords[MAXRESERVED]
= {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
{"repeat",REPEAT},{"until",UNTIL},{"read",READ},
{"write",WRITE}};
/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
for (i=0;i<MAXRESERVED;i++)
if (!strcmp(s,reservedWords[i].str))
return reservedWords[i].tok;
return ID;
}
/****************************************/
/* the primary function of the scanner */
/****************************************/
/* function getToken returns the
* next token in source file
*/
TokenType getToken(void)
{ /* index for storing into tokenString */
int tokenStringIndex = 0;
/* holds current token to be returned */
TokenType currentToken;
/* current state - always begins at START */
StateType state = START;
/* flag to indicate save to tokenString */
int save;
while (state != DONE)
{ int c = getNextChar();
save = TRUE;
switch (state)
{ case START:
if (isdigit(c))
state = INNUM;
else if (isalpha(c))
state = INID;
else if (c == ':')
state = INASSIGN;
else if ((c == ' ') || (c == '\t') || (c == '\n'))
save = FALSE;
else if (c == '{')
{ save = FALSE;
state = INCOMMENT;
}
else
{ state = DONE;
switch (c)
{ case EOF:
save = FALSE;
currentToken = ENDFILE;
break;
case '=':
currentToken = EQ;
break;
case '<':
currentToken = LT;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '*':
currentToken = TIMES;
break;
case '/':
currentToken = OVER;
break;
case '(':
currentToken = LPAREN;
break;
case ')':
currentToken = RPAREN;
break;
case ';':
currentToken = SEMI;
break;
default:
currentToken = ERROR;
break;
}
}
break;
case INCOMMENT:
save = FALSE;
if (c == EOF)
{ state = DONE;
currentToken = ENDFILE;
}
else if (c == '}') state = START;
break;
case INASSIGN:
state = DONE;
if (c == '=')
currentToken = ASSIGN;
else
{ /* backup in the input */
ungetNextChar();
save = FALSE;
currentToken = ERROR;
}
break;
case INNUM:
if (!isdigit(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = NUM;
}
break;
case INID:
if (!isalpha(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = ID;
}
break;
case DONE:
default: /* should never happen */
fprintf(listing,"Scanner Bug: state= %d\n",state);
state = DONE;
currentToken = ERROR;
break;
}
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
tokenString[tokenStringIndex++] = (char) c;
if (state == DONE)
{ tokenString[tokenStringIndex] = '\0';
if (currentToken == ID)
currentToken = reservedLookup(tokenString);
}
}
if (TraceScan) {
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken,tokenString);
}
return currentToken;
}
Note: And, below seperate code; related one also (sub-logic for expansion implementation for it):
void printToken( TokenType token, const char* tokenString )
{ switch (token)
{ case IF:
case THEN:
case ELSE:
case END:
case REPEAT:
case UNTIL:
case READ:
case WRITE:
fprintf(listing,
"reserved word: %s\n",tokenString);
break;
case ASSIGN: fprintf(listing,":=\n"); break;
case LT: fprintf(listing,"<\n"); break;
case EQ: fprintf(listing,"=\n"); break;
case LPAREN: fprintf(listing,"(\n"); break;
case RPAREN: fprintf(listing,")\n"); break;
case SEMI: fprintf(listing,";\n"); break;
case PLUS: fprintf(listing,"+\n"); break;
case MINUS: fprintf(listing,"-\n"); break;
case TIMES: fprintf(listing,"*\n"); break;
case OVER: fprintf(listing,"/\n"); break;
case ENDFILE: fprintf(listing,"EOF\n"); break;
case NUM:
fprintf(listing,
"NUM, val= %s\n",tokenString);
break;
case ID:
fprintf(listing,
"ID, name= %s\n",tokenString);
break;
case ERROR:
fprintf(listing,
"ERROR: %s\n",tokenString);
break;
default: /* should never happen */
fprintf(listing,"Unknown token: %d\n",token);
}
}
Tiny C Compiler.pdf