编译原理 - 词法分析器

样例输入:if num > 100 then num2 = 100 else num2 = 0 ; #

样例输出:

 

#include "cstdio"
#include "cstring"
#include "cctype"

const int ERROR = -1 ;
const int OVER = 0 ;

const int BEGIN = 1;
const int END = 2;
const int IF = 3;
const int THEN = 4;
const int WHILE = 5;
const int DO = 6;
const int CONST = 7;
const int VAR = 8;
const int CALL = 9 ;
const int PROCEDURE = 10;

const int ID = 11;
const int NUMBER = 12;

const int PLUS = 13;
const int SUB =14;
const int STAR = 15;
const int DIV = 16;
const int MOD = 17;
const int EQUAL = 18;//=
const int NE =19;//<>
const int LESS = 20;//<
const int MORE = 21;//>
const int LE = 22;//<=
const int ME = 23;//>=
const int ASSIGN = 24;//:=
const int LPAR = 25;//(
const int RPAR = 26;//)
const int COMMA = 27;//,
const int DOT = 28;//.
const int SEMICOLON = 29;//;

char buffer[200]= {'\0'};
char str[20];//存放构成单词符号的字符串*/
char ch;

int n;
int number;  //存放常量(整数)
int pstr,pbuffer;    

//关键字
int keyWordNum=10; //关键字
char keywords [10][10] = {"begin","end","if","then","while","do","const","var","call","procedure"};

//常数表
int numOfNumbers=0;
int numtab[100];

//变量表
int numOfVars=0;
char vartab[100][10];

//读取非空字符
void getBC()
{
    ch=buffer[pbuffer++];
    while(ch==' ')ch=buffer[pbuffer++];
}

//回退一个字符
void retract()
{
    ch=buffer[pbuffer--];
}

//对缓冲区的内容进行扫描
int scan()
{
    //初始化字符串
    pstr=0;
    for(int i=0; i<20; ++i)
    {
        str[i]='\0';
    }

    //初始化数字常量
    number=0;

    getBC();

    if( isalpha(ch) )
    {
        //读取标识符
        while( isalpha(ch) || isdigit(ch) )
        {
            str[pstr++]=ch;
            ch = buffer[pbuffer++];
        }
        str[pstr++]='\0';
        int syn=11;

        retract();//回退

        //判断是否是关键字
        for(int i=0; i<keyWordNum; ++i)
        {
            if(strcmp(str,keywords[i])==0)    //字符串的比较
            {
                syn=i+1;
                break;
            }
        }
        return syn;
    }
    else if( isdigit(ch) )
    {
        while( isdigit(ch) )
        {
            number = number*10 + ch-'0';
            ch = buffer[pbuffer++];
        }
        retract();//回退
        return NUMBER;
    }
    else
    {
        switch(ch)
        {
        case'+':
            //syn=13;
            str[0]=ch;
            return PLUS;
        case'-':
            //syn=14;
            str[0]=ch;
            return SUB;
        case'*':
            //syn=15;
            str[0]=ch;
            return STAR;
        case'/':
            //syn=16;
            str[0]=ch;
            return DIV;
        case'%':
            str[0]=ch;
            return MOD;
        case'=':
            str[0]=ch;
            return EQUAL;
        case'<':
            pstr=0;
            str[pstr++]=ch;
            ch=buffer[pbuffer++];
            if(ch=='>')
            {
                str[pstr++]=ch;
                return NE;
            }
            else if(ch=='=')
            {

                str[pstr++]=ch;
                return LE;
            }
            else
            {
                retract();
                return LESS;
            }
            break;
        case'>':
            pstr=0;
            str[pstr++]=ch;
            ch=buffer[pbuffer++];
            if(ch=='=')
            {
                str[pstr++]=ch;
                return ME;
            }
            else
            {
                retract();
                return MORE;
            }
            break;
        case':':
            pstr=0;
            str[pstr++]=ch;
            ch=buffer[pbuffer++];
            if(ch=='=')
            {
                str[pstr++]=ch;
                return ASSIGN;
            }
            else
            {
                retract();
                return ERROR;
            }
            break;
        case'(':
            str[0]=ch;
            return LPAR;
        case')':
            str[0]=ch;
            return RPAR;
        case',':
            str[0]=ch;
            return COMMA;
        case'.':
            str[0]=ch;
            return DOT;
        case';':
            str[0]=ch;
            return SEMICOLON;
        case'#':
            str[0]=ch;
            return OVER;
        default:
            return ERROR;
        }
    }


}

//插入变量,返回在变量表的位置
int insert_Var(const char * str)
{
    for(int i=0; i<numOfVars; i++)
    {
        if(strcmp(str,vartab[i])==0)    //字符串的比较
        {
            return i+1;
        }
    }
    strcpy(vartab[numOfVars++],str);
    return numOfVars;
}

//插入整数常量,返回位置
int insert_Num(int number)
{
    for(int i=0; i<numOfNumbers; i++)
    {
        if(numtab[i]==number)    //字符串的比较
        {
            return i+1;
        }
    }
    numtab[numOfNumbers++]=number;
    return numOfNumbers;
}


main()
{
    freopen("in.txt","r",stdin);//从文件读入数据

    printf("\n\nThe explanation :\n"
           "1. 1 to 10 : Keyword\n"
           "2. 11 : Other indicators by user\n"
           "3. 12 : Numbers\n"
           "4. 13 to 29 : Operators\n");


    printf("\nPlease input string:\n");

    //输入到缓冲区
    pbuffer=0;
    do
    {
        ch=getchar();
        buffer[pbuffer++]=ch;
    }
    while(ch!='#');

    //词法分析
    pbuffer=0;
    int loc,syn;
    do
    {
        syn = scan();
        switch(syn)
        {
        case -1:
            printf(" ERROR\n");
            break;
        case 12:
            //常数
            loc = insert_Num(number);
            printf("( %2d,%2d ) %d\n",syn,loc,number);
            break;
        case 11:
            //用户变量
            loc = insert_Var(str);
            printf("( %2d,%2d ) %s\n",syn,loc,str);
            break;
        default:
            //关键字
            printf("( %2d, - ) %s\n",syn,str);
        }
    }
    while(syn!=0);

    return 0;
}

  

 

posted @ 2016-04-12 11:46  纸牌  阅读(250)  评论(0编辑  收藏  举报