实验一 《实现C语言小子集程序的词法分析》实验要求

以表1的小语言为例实现词法分析

 

设计单词属性值,各类表格(表示标识符表、常量表),单词符号及机内表示
编码实现词法分析程序

采用文本输入和输出的方式。程序从名为“test.txt”的文件中读入代码,将词法分析的结果保存到“output.txt”中。

要求实现:

(1)对正确源程序的识别;

(2)对包含有注释//和/* */的源程序的识别;

(3)对包含错误标识符的源程序的识别。

 

测试样例:test.txt

int i = 3;

int 5j = 10;

int m = max(i, j);

while(i<m) do

{

i = i+ 1;

}

 

void max(int x, int y)

{

int 3temp = 0;

if(x > y)

temp = x;

else

temp = y;

return temp;

}


输出:out.txt


<26,->, <1,i>, <16,->, <2,3>, <23,->,

<26,->, LexicalError, <16,->, <2,10>, <23,->,

<26,->, <1,m>, <16,->, <1,max>, <17,->, <1,i>, <24,->, <1,j>, <18,->, <23,->,

<31,->, <17,->, <1,i>, <8,->, <1,m>, <18,->, <32,->,

<21,->,

<1,i>, <16,->, <1,i>, <3,->, <2,1>,<23,->,

<22,->,

<25,->, <1,max>, <17,->, <26,->, <1,x>, <24,->, <26,->, <1,y>, <18,->,

<21,->,

<26,->, LexicalError ,<16,->, <2,0>, <23,->,

<29,->, <17,->, <1,x>, <10,->, <1,y>, <18,->,

<1,temp>, <16,->, <1,x>, <23,->,

<30,->

<1,temp>, <16,->, <1,y>,

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BASIZE 20
#define EOS '\0'
#define True 1
#define False 0

#define key_num 9 //关键字数量
#define buff_size 1024

char buff[buff_size];
int lineno = 1; //行数
int tokenval = 1; //数字
char lexbuf[BASIZE]; //字符缓存区
int error[10]; //错误记录

//关键字
char keyWord[key_num][10]={
"void","int","float","char","if","else",
"while","do","return"
};

 

//数字判断
int isDigit(char ch)
{
if(ch>='0'&&ch<='9')
return True;
else
return False;
}


//字母判断
int isAlpha(char ch)
{
if((ch>='a' && ch<='z') || (ch>='A' && ch <='Z'))
return True;
else
return False;
}


//操作符
int isOperator(char ch)
{
if(ch == '+'||ch=='-'||ch=='*'||ch=='/'||ch=='>'||ch=='<'||ch=='!'||ch=='=')
return True;
else
return False;
}


//分隔符
int isDelimater(char ch)
{
if(ch=='('||ch==')'||ch=='['||ch==']'||ch=='{'||ch=='}'||ch==';'||ch==',')
return True;
else
return False;
}

 


/×
词发分析器
@auth: finch
@time: 2017/4/1
×/

int lexan()
{
int error_num = 0;
FILE *fp_in=NULL;
FILE *fp_out=NULL;

if((fp_in=fopen("test.txt","r"))!=NULL) //读取源码进入缓存buff中
{
char ch=NULL;
int x =0;
while(ch!=EOF) //文件结束符
{
printf("%d\n",x);
ch=fgetc(fp_in);
buff[x]= ch;
x++;
if(x>=buff_size)
{

printf("Error");
exit(-1);
}
}
buff[x]=EOF;
fclose(fp_in);
}
else
{
printf("Error:can't find the file");
exit(-1);
}


if((fp_out=fopen("output.txt","w+"))==NULL) //词法分析的输出
{
printf("Error:can't create a file");
exit(-1);
}


int t=0;
while(buff[t]!=EOF) //读读入内存的源码进行分析
{
if (buff[t] ==' '||buff[t]=='\t')
; //删除空格
else if(buff[t]== '\n') //换行
{

fprintf(fp_out,"\n");
printf("\n");
lineno = lineno +1; //行数

}
else if(isDigit(buff[t])) //数字
{
tokenval = buff[t] - '0';
t++;
while(isDigit(buff[t])) //整数
{
tokenval = tokenval *10 + buff[t] -'0';
t++;
}
if(isAlpha(buff[t])) //数字+字母 :报错
{
fprintf(fp_out," LexicalError,");
error[error_num++]=lineno; //出错行记录
printf("<error:%d,%c>",tokenval,buff[t]);
}
else
{
t--;
fprintf(fp_out,"<%d,->,",2);
printf("<%d:%d>,",2,tokenval);
}

}
else if(True==isAlpha(buff[t])||buff[t]=='_') //标识符或者关键字
{

int b = 0;
while(isAlpha(buff[t])||isDigit(buff[t])||buff[t]=='_') //字母或数字
{
lexbuf[b] = buff[t];
t++;
b = b +1;
if(b >=BASIZE)
printf("compiler error");
}
t--;
lexbuf[b] ='\0';
if(True==isKeyWord(lexbuf)) //关键字判断
{
int code = 2;
if(0==strcmp(lexbuf,"void")) //查找对应代码
code =25;
else if(0==strcmp(lexbuf,"int"))
code =26;
else if(0==strcmp(lexbuf,"float"))
code =27;
else if(!strcmp(lexbuf,"char"))
code =28;
else if(!strcmp(lexbuf,"if"))
code =29;
else if(!strcmp(lexbuf,"else"))
code =30;
else if(!strcmp(lexbuf,"while"))
code =31;
else if(!strcmp(lexbuf,"do"))
code =32;
else if(!strcmp(lexbuf,"return"))
code =33;
else if(!strcmp(lexbuf,"main"))
code =34;
else if(!strcmp(lexbuf,"printf"))
code =35;
fprintf(fp_out,"<%d,->,",code);
printf("<%d:%s>,",code,lexbuf);
}
else
{ //标识符输出

fprintf(fp_out,"<%d,%s>,",1,lexbuf);
printf("<%d:%s>,",1,lexbuf);
}

}
else if(isDelimater(buff[t])) //分隔符
{
int code = 0;
if(buff[t]==',')
code=24;
else if(buff[t]==';')
code=23;
else if(buff[t]=='{')
code=21;
else if(buff[t]=='}')
code=22;
else if(buff[t]=='(')
code=17;
else if(buff[t]==')')
code=18;
else if(buff[t]=='[')
code=19;
else if(buff[t]==']')
code=20;
fprintf(fp_out,"<%d,->,",code);
printf("<%d,%c>,",code,buff[t]);

}
else if(isOperator(buff[t])) //运算符
{
int code = 0;
if(buff[t]=='+')
code=3;
else if(buff[t]=='-')
code=4;
else if(buff[t]=='*')
code=5;
else if(buff[t]=='/')
{
t++;
if(buff[t]=='*') //多行注释判断
{
while(buff[t]!='/')
{
t++; //忽略注释
}
}
else if(buff[t]=='/') //单行注释
{
while(buff[t]!='\n')
{

t++;
}
}else{
code = 4;
t--;
}

}

else if(buff[t]=='<')
{
t++;
if(buff[t]=='=') //==
{

}
else
{
t--;
code = 8; // <
}
}

else if(buff[t]=='>')
{
t++;
if(buff[t]=='=') // >=
{
code =11;
}
else{
code=10;
t--;
}
}

else if(buff[t]=='=')
{
t++;
if(buff[t]=='=') // ==
{
code =12;
}
else{
code=16; // =
t--;
}
}

fprintf(fp_out,"<%d,->,",code);
printf("<%d,%c>",code,buff[t]);
}
t++;
}

if(error_num!=0) //错误输出
{
fprintf(fp_out,"LexicalError(s) on line(s) ");
for(int i=0;i<error_num;i++)
if(i!=error_num)
fprintf(fp_out,"%d,",error[i]);
else
fprintf(fp_out,"%d",error[i]);

}
fclose(fp_out);
return 0;
}


int main() //主程序入口
{
lexan();
return 0;
}