Code_cpp_0922_reference1

tncks commented 4 days ago

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define NO_KEYWORDS 6
#define ID_LENGTH 12

enum tsymbol{
    tnull = -1, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
    s20, s21, snumber, sident, selse, sif, sint, s
};

struct tokenType {
    int number; // 입력된 숫자를 저장
    char sym[2]; // 특수 심볼 지정
    union{
        char id[ID_LENGTH];
        int num;
    } value; // 키워드 지정
} a;

char *keyword[NO_KEYWORDS] = {
    "else", "if", "int", "return", "void", "while"
};

enum tsymbol tnum[NO_KEYWORDS] = {
    selse, sif, sint, sreturn, svoid, swhile
};

int superLetter(char ch) {
    if(isalpha(ch) || ch == '_') return 1;
    else return 0; 
}

int superLetterOrDigit(char ch){
    if(isalnum(ch) || ch == '_') return 1;
    else return 0; 
}

int getIntNum(char firstCharacter) {
    int num = 0;
    char ch;
    if(firstCharacter != '0'){
        ch = firstCharacter;
        do{
            num = 10 * num + (int)(ch - '0');
            ch = getchar();
        } while(isdigit(ch));
    } 
    else { 
        num = 0; 
    }
    ungetc(ch, stdin);
    return num; 
}

struct tokenType scanner() {
    struct tokenType token;
    int i=0, index=0;
    char ch, id[ID_LENGTH];
    token.number = tnull;
    do {
        while (isspace(ch = getchar()));

        if(superLetter(ch)){ // 대문자, 소문자일 경우
            i = 0;
            do{
                if(i < ID_LENGTH) {
                    id[i++] = ch; 
                }
                ch = getchar();
            } while(superLetterOrDigit(ch));
            id[i] = '\0';
            ungetc(ch, stdin); 
            index = 0;
            while(index < NO_KEYWORDS) {
                if(!strcmp(id, keyword[index])) break; 
                if(index < NO_KEYWORDS) {
                    token.number = tnum[index];
                    strcpy(token.value.id, id); 
                }
                else {
                    token.number = sident;
                    strcpy(token.value.id, id); 
                }
                index++;
            }
        }
        else if(isdigit(ch)){ 
            token.number = snumber;
            token.value.num = getIntNum(ch);
        }
        else switch(ch){
            case '*' :
                ch = getchar();
                if(ch == '/') {
                    token.number = s21;
                    strcpy(token.sym, "*/"); 
                }
                else {
                    token.number = s3;
                    strcpy(token.sym, "* ");
                    ungetc(ch, stdin); 
                } break;
            case '/' :
                ch = getchar();
                if(ch == '*') {
                    strcpy(token.sym, "/*");
                    token.number = s20; 
                }
                else {
                    strcpy(token.sym, "/ ");
                    token.number = s4;
                    ungetc(ch, stdin); 
                } break;
            case '<' :
                ch = getchar();
                if(ch == '=') {
                    strcpy(token.sym, "<=");
                    token.number = s6; 
                }
                else {
                    strcpy(token.sym, "< ");
                    token.number = s5;
                    ungetc(ch, stdin); 
                } break;
            case '>' :
                ch = getchar();
                if(ch == '=') {
                    strcpy(token.sym, ">=");
                    token.number = s8; 
                }
                else {
                    strcpy(token.sym, "> ");
                    token.number = s7;
                    ungetc(ch, stdin); 
                } break;
            case '=' :
                ch = getchar();
                if(ch == '=') {
                    strcpy(token.sym, "==");
                    token.number = s9; 
                }
                else {
                    strcpy(token.sym, "= ");
                    token.number = s11;
                    ungetc(ch, stdin); 
                } break;
            case '!' :
                ch = getchar();
                if(ch == '=') {
                    strcpy(token.sym, "!=");
                    token.number = s12; 
                }
                else printf("Error\r\n"); break;
            case '+' : token.number = s1; strcpy(token.sym, "+"); break;
            case '-' : token.number = s2; strcpy(token.sym, "-"); break;
            case ';' : token.number = s12; strcpy(token.sym, ";"); break;
            case ',' : token.number = s13; strcpy(token.sym, ","); break;
            case '(' : token.number = s14; strcpy(token.sym, "("); break;
            case ')' : token.number = s15; strcpy(token.sym, ")"); break;
            case '[' : token.number = s16; strcpy(token.sym, "["); break;
            case ']' : token.number = s17; strcpy(token.sym, "]"); break;
            case '{' : token.number = s18; strcpy(token.sym, "{"); break;
            case '}' : token.number = s19; strcpy(token.sym, "}"); break;
            case EOF : token.number = s21; strcpy(token.sym, "EOF"); break;
            default: {
                printf("Current character : %c", ch);
                printf("Error\r\n");
                break; 
            }
        }
    } while(token.number == tnull);
    return token; 
}

void main() {
    printf("Input String : \n");
    while(1) {
        a = scanner();
        if(a.number < 21) printf("Symbol : %s\n", a.sym);
        else if(a.number > 22 && a.number < 29) printf("Keyword : %s\n", a.value.id);
        else if (a.number == 21) printf("Number : %d \n", a.value.num);
        else if (a.number == 22) printf("Variable : %s\n", a.value.id);
        else { }
    }
}

Tiny C Compiler.pdf

tncks commented 4 days ago

test 1

/* A program to perform Euclid's
   Algorithm to computer gcd */

int gcd (int u, int v)
{
    if (v == 0) return u;
    else return gcd(v,u-u/v*v);
    /* u-u/v*v == u mod v */
}

void main(void)
{
    int x; int y;
    x = input(); y = input();
    output(gcd(x,y));
}

tncks commented 4 days ago

result 1


C-MINUS COMPILATION: ./test.1.txt
    4: reserved word: int
    4: ID, name= gcd
    4: (
    4: reserved word: int
    4: ID, name= u
    4: ,
    4: reserved word: int
    4: ID, name= v
    4: )
    5: {
    6: reserved word: if
    6: (
    6: ID, name= v
    6: ==
    6: NUM, val= 0
    6: )
    6: reserved word: return
    6: ID, name= u
    6: ;
    7: reserved word: else
    7: reserved word: return
    7: ID, name= gcd
    7: (
    7: ID, name= v
    7: ,
    7: ID, name= u
    7: -
    7: ID, name= u
    7: /
    7: ID, name= v
    7: *
    7: ID, name= v
    7: )
    7: ;
    9: }
    11: reserved word: void
    11: ID, name= main
    11: (
    11: reserved word: void
    11: )
    12: {
    13: reserved word: int
    13: ID, name= x
    13: ;
    13: reserved word: int
    13: ID, name= y
    13: ;
    14: ID, name= x
    14: =
    14: ID, name= input
    14: (
    14: )
    14: ;
    14: ID, name= y
    14: =
    14: ID, name= input
    14: (
    14: )
    14: ;
    15: ID, name= output
    15: (
    15: ID, name= gcd
    15: (
    15: ID, name= x
    15: ,
    15: ID, name= y
    15: )
    15: )
    15: ;
    16: }
    17: EOF

tncks commented 4 days ago

Makefile (professor release version)

# Makefile for C-Minus Scanner
# ./lex/tiny.l --> ./cminus.l

CC = gcc 

CFLAGS = -W -Wall

OBJS = main.o util.o scan.o 
OBJS_LEX = main.o util.o lex.yy.o

.PHONY: all clean
all: cminus_cimpl cminus_lex

clean:
    -rm -vf cminus_cimpl cminus_lex *.o lex.yy.c

cminus_cimpl: $(OBJS)
    $(CC) $(CFLAGS) -o $@ $(OBJS) 

cminus_lex: $(OBJS_LEX)
    $(CC) $(CFLAGS) -o $@ $(OBJS_LEX) -lfl

main.o: main.c globals.h util.h scan.h
    $(CC) $(CFLAGS) -c -o $@ $<

scan.o: scan.c globals.h util.h scan.h
    $(CC) $(CFLAGS) -c -o $@ $<

util.o: util.c globals.h util.h
    $(CC) $(CFLAGS) -c -o $@ $<

lex.yy.o: lex.yy.c globals.h util.h scan.h
    $(CC) $(CFLAGS) -c -o $@ $<

lex.yy.c: cminus.l
    flex -o $@ $<

tncks commented 3 days ago

Good Reference (focusing on RE, only rules section)

%{
    #include "y.tab.h"
    int countn=0;
%}
%option yylineno

alpha [a-zA-Z]
digit [0-9]
unary "++"|"--"

%%

"printf"                    { strcpy(yylval.nd_obj.name,(yytext)); return PRINTFF; }
"scanf"                     { strcpy(yylval.nd_obj.name,(yytext)); return SCANFF; }
"int"                       { strcpy(yylval.nd_obj.name,(yytext)); return INT; }
"float"                     { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT; }
"char"                      { strcpy(yylval.nd_obj.name,(yytext)); return CHAR; }
"void"                      { strcpy(yylval.nd_obj.name,(yytext)); return VOID; }
"return"                    { strcpy(yylval.nd_obj.name,(yytext)); return RETURN; }
"for"                       { strcpy(yylval.nd_obj.name,(yytext)); return FOR; }
"if"                        { strcpy(yylval.nd_obj.name,(yytext)); return IF; }
"else"                      { strcpy(yylval.nd_obj.name,(yytext)); return ELSE; }
^"#include"[ ]*<.+\.h>      { strcpy(yylval.nd_obj.name,(yytext)); return INCLUDE; }
"true"                      { strcpy(yylval.nd_obj.name,(yytext)); return TRUE; }
"false"                     { strcpy(yylval.nd_obj.name,(yytext)); return FALSE; }
[-]?{digit}+                { strcpy(yylval.nd_obj.name,(yytext)); return NUMBER; }
[-]?{digit}+\.{digit}{1,6}  { strcpy(yylval.nd_obj.name,(yytext)); return FLOAT_NUM; }
{alpha}({alpha}|{digit})*   { strcpy(yylval.nd_obj.name,(yytext)); return ID; }
{unary}                     { strcpy(yylval.nd_obj.name,(yytext)); return UNARY; }
"<="                        { strcpy(yylval.nd_obj.name,(yytext)); return LE; }
">="                        { strcpy(yylval.nd_obj.name,(yytext)); return GE; }
"=="                        { strcpy(yylval.nd_obj.name,(yytext)); return EQ; }
"!="                        { strcpy(yylval.nd_obj.name,(yytext)); return NE; }
">"                         { strcpy(yylval.nd_obj.name,(yytext)); return GT; }
"<"                         { strcpy(yylval.nd_obj.name,(yytext)); return LT; }
"&&"                        { strcpy(yylval.nd_obj.name,(yytext)); return AND; }
"||"                        { strcpy(yylval.nd_obj.name,(yytext)); return OR; }
"+"                         { strcpy(yylval.nd_obj.name,(yytext)); return ADD; }
"-"                         { strcpy(yylval.nd_obj.name,(yytext)); return SUBTRACT; }
"/"                         { strcpy(yylval.nd_obj.name,(yytext)); return DIVIDE; }
"*"                         { strcpy(yylval.nd_obj.name,(yytext)); return MULTIPLY; }
\/\/.*                      { ; }
\/\*(.*\n)*.*\*\/           { ; } 
[ \t]*                      { ; }
[\n]                        { countn++; }
.                           { return *yytext; }
["].*["]                    { strcpy(yylval.nd_obj.name,(yytext)); return STR; }
['].[']                     { strcpy(yylval.nd_obj.name,(yytext)); return CHARACTER; }

%%

int yywrap() {
    return 1;
}

tncks commented 3 days ago

pseudo-code algorithm reference simple version (DFA state machine)

  TOKEN getRelop()                        // TOKEN has two components
    TOKEN retToken = new(RELOP);          // First component set here
    while (true)
       switch(state)
         case 0: c = nextChar();
                 if (c == '<')      state = 1;
                 else if (c == '=') state = 5;
                 else if (c == '>') state = 6;
                 else fail();
                 break;
         case 1: ...
         ...
         case 8: retract();  // an accepting state with a star
                 retToken.attribute = GT;  // second component
                 return(retToken);

tncks commented 3 days ago

Pure pseudo-code algorithm for DFA Accept

    s = s0;   // start state.
    c = nextChar();      // a priming read
    while (c != eof) {
      s = move(s,c);
      c = nextChar();
    }
    if (s is in F, the set of accepting states) return yes
    else return no

tncks commented 2 days ago

Print Formatting Core Logic here:

/* states in scanner DFA */
typedef enum
   { START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
   StateType;

/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN+1];

/* BUFLEN = length of the input buffer for
   source code lines */
#define BUFLEN 256

static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */

/* getNextChar fetches the next non-blank character
   from lineBuf, reading in a new line if lineBuf is
   exhausted */
static int getNextChar(void)
{ if (!(linepos < bufsize))
  { lineno++;
    if (fgets(lineBuf,BUFLEN-1,source))
    { if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
      bufsize = strlen(lineBuf);
      linepos = 0;
      return lineBuf[linepos++];
    }
    else
    { EOF_flag = TRUE;
      return EOF;
    }
  }
  else return lineBuf[linepos++];
}

/* ungetNextChar backtracks one character
   in lineBuf */
static void ungetNextChar(void)
{ if (!EOF_flag) linepos-- ;}

/* lookup table of reserved words */
static struct
    { char* str;
      TokenType tok;
    } reservedWords[MAXRESERVED]
   = {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
      {"repeat",REPEAT},{"until",UNTIL},{"read",READ},
      {"write",WRITE}};

/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
  for (i=0;i<MAXRESERVED;i++)
    if (!strcmp(s,reservedWords[i].str))
      return reservedWords[i].tok;
  return ID;
}

/****************************************/
/* the primary function of the scanner  */
/****************************************/
/* function getToken returns the 
 * next token in source file
 */
TokenType getToken(void)
{  /* index for storing into tokenString */
   int tokenStringIndex = 0;
   /* holds current token to be returned */
   TokenType currentToken;
   /* current state - always begins at START */
   StateType state = START;
   /* flag to indicate save to tokenString */
   int save;
   while (state != DONE)
   { int c = getNextChar();
     save = TRUE;
     switch (state)
     { case START:
         if (isdigit(c))
           state = INNUM;
         else if (isalpha(c))
           state = INID;
         else if (c == ':')
           state = INASSIGN;
         else if ((c == ' ') || (c == '\t') || (c == '\n'))
           save = FALSE;
         else if (c == '{')
         { save = FALSE;
           state = INCOMMENT;
         }
         else
         { state = DONE;
           switch (c)
           { case EOF:
               save = FALSE;
               currentToken = ENDFILE;
               break;
             case '=':
               currentToken = EQ;
               break;
             case '<':
               currentToken = LT;
               break;
             case '+':
               currentToken = PLUS;
               break;
             case '-':
               currentToken = MINUS;
               break;
             case '*':
               currentToken = TIMES;
               break;
             case '/':
               currentToken = OVER;
               break;
             case '(':
               currentToken = LPAREN;
               break;
             case ')':
               currentToken = RPAREN;
               break;
             case ';':
               currentToken = SEMI;
               break;
             default:
               currentToken = ERROR;
               break;
           }
         }
         break;
       case INCOMMENT:
         save = FALSE;
         if (c == EOF)
         { state = DONE;
           currentToken = ENDFILE;
         }
         else if (c == '}') state = START;
         break;
       case INASSIGN:
         state = DONE;
         if (c == '=')
           currentToken = ASSIGN;
         else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken = ERROR;
         }
         break;
       case INNUM:
         if (!isdigit(c))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = NUM;
         }
         break;
       case INID:
         if (!isalpha(c))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = ID;
         }
         break;
       case DONE:
       default: /* should never happen */
         fprintf(listing,"Scanner Bug: state= %d\n",state);
         state = DONE;
         currentToken = ERROR;
         break;
     }
     if ((save) && (tokenStringIndex <= MAXTOKENLEN))
       tokenString[tokenStringIndex++] = (char) c;
     if (state == DONE)
     { tokenString[tokenStringIndex] = '\0';
       if (currentToken == ID)
         currentToken = reservedLookup(tokenString);
     }
   }
   if (TraceScan) {
     fprintf(listing,"\t%d: ",lineno);
     printToken(currentToken,tokenString);
   }
   return currentToken;
}

Note: And, below seperate code; related one also (sub-logic for expansion implementation for it):

void printToken( TokenType token, const char* tokenString )
{ switch (token)
  { case IF:
    case THEN:
    case ELSE:
    case END:
    case REPEAT:
    case UNTIL:
    case READ:
    case WRITE:
      fprintf(listing,
         "reserved word: %s\n",tokenString);
      break;
    case ASSIGN: fprintf(listing,":=\n"); break;
    case LT: fprintf(listing,"<\n"); break;
    case EQ: fprintf(listing,"=\n"); break;
    case LPAREN: fprintf(listing,"(\n"); break;
    case RPAREN: fprintf(listing,")\n"); break;
    case SEMI: fprintf(listing,";\n"); break;
    case PLUS: fprintf(listing,"+\n"); break;
    case MINUS: fprintf(listing,"-\n"); break;
    case TIMES: fprintf(listing,"*\n"); break;
    case OVER: fprintf(listing,"/\n"); break;
    case ENDFILE: fprintf(listing,"EOF\n"); break;
    case NUM:
      fprintf(listing,
          "NUM, val= %s\n",tokenString);
      break;
    case ID:
      fprintf(listing,
          "ID, name= %s\n",tokenString);
      break;
    case ERROR:
      fprintf(listing,
          "ERROR: %s\n",tokenString);
      break;
    default: /* should never happen */
      fprintf(listing,"Unknown token: %d\n",token);
  }
}

tncks / factorysetupsCompProjSunday

Code_cpp_0922_reference1 #3