c语言词法分析器
#include<iostream>
#include<cstring>
#include<cstdio>
#include<fstream>
#include<map>
#include<string>
#include<cstdlib>
#include<set>
#include<fstream>
using namespace std;
string checkstring(string filewriter,int &i);
string checkchar(string filewriter,int &i);
string checkdight(string filewriter,int &i);
string checkoperator(string filewriter,int &i);
bool checkdeadline(char ch);
bool checkletterchar(char ch);
void error();
string scanner();
string reseve[]= {"","auto","break","case","char","const","continue","default","do","double","else","enum","extern",
"float","for","goto","if","int","long","register","return","short","signed","sizeof","static","struct","switch","typedef",
"unsigned","union","void","volatile","while","main","include","{","}",
"(",")","[","]","\"","\'",
"#",":",";","<","<=",">",">=","!=","==","+","-","*","//","%","++","--"
,"&&","&","||","|","^","!","~",">>","<<","=","+=","-=","%=","*=","<<=",">>=","&=","|=","/=","^=",",","."
};//保留字和所有运算符
char deadline[]= {'?','!','%','&','(',')','*','+',',','-','.','/',':',';','<','=','>','^','{','|','}','~',']','[','\'','\"'};//界符和运算符开头
bool checkdeadline(char ch)//鉴别是否是界符或者运算符第一个符号
{
for(auto &it:deadline)
{
if(it==ch)
return true;
}
return false;
}
string checkoperator(string filewriter,int &i)//判断是否为运算符或者界符
{
string processstring="";
if (filewriter.length()>i+2&&filewriter[i]=='<'&&filewriter[i+1]=='<'&&filewriter[i+2]=='=')
{
processstring+=filewriter.substr(i,3);
i+=3;
}
else if (filewriter.length()>i+2&&filewriter[i]=='>'&&filewriter[i+1]=='>'&&filewriter[i+2]=='=')
{
processstring+=filewriter.substr(i,3);
i+=3;
}
else if (filewriter.length()>i+1&&filewriter[i]=='<'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='>'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='!'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='='&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='+'&&filewriter[i+1]=='+')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='-'&&filewriter[i+1]=='-')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='&'&&filewriter[i+1]=='&')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='|'&&filewriter[i+1]=='|')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='>'&&filewriter[i+1]=='>')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='<'&&filewriter[i+1]=='<')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='+'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='-'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='%'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='*'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='&'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='|'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='/'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else if (filewriter.length()>i+1&&filewriter[i]=='^'&&filewriter[i+1]=='=')
{
processstring+=filewriter.substr(i,2);
i+=2;
}
else
{
processstring+=filewriter.substr(i,1);
i++;
}
return processstring;
}
string checkdight(string filewriter,int &i)//鉴别是否是数字(包含浮点数)
{
string processstring="";
while(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0'))
{
processstring+=filewriter[i];
i++;
}
if(i<filewriter.length()&&filewriter[i]=='.')
{
processstring+=filewriter[i];
i++;
if(!(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0')))
{
error();
}
}
while(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0'))
{
processstring+=filewriter[i];
i++;
}
return processstring;
}
bool checkletterchar(char ch)//判断是否为字母
{
if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z')
{
return true;
}
return false;
}
string checkchar(string filewriter,int &i)//判断一个字符(包括转义字符)
{
string processstring;
if(filewriter[i]=='\\')
{
if(i+1<filewriter.length())
{
if(filewriter[i+1]=='n'||filewriter[i+1]=='a'||filewriter[i+1]=='b'||filewriter[i+1]=='f'||filewriter[i+1]=='r'||filewriter[i+1]=='t'||filewriter[i+1]=='v'||filewriter[i+1]=='\''||filewriter[i+1]=='\"'||filewriter[i+1]=='\\')
//转义字符
{
processstring=filewriter.substr(i,2);
i+=2;
}
else
{
processstring=filewriter.substr(i,1);
i++;
}
}
else
{
processstring=filewriter.substr(i,1);
i++;
}
}
else
{
processstring=filewriter.substr(i,1);
i++;
}
return processstring;
}
string checkstring(string filewriter,int &i)
{
string processstring="";
while(i<filewriter.length()&&filewriter[i]!='\"')
{
processstring+=checkchar(filewriter,i);
}
return processstring;
}
string checkkey(string filewriter,int &i)//鉴别是否是标识符
{
string processstring="";
processstring+=filewriter[i];
i++;
while(i<filewriter.length()&&((filewriter[i]=='_'||((filewriter[i]>='a'&&filewriter[i]<='z')||(filewriter[i]>='A'&&filewriter[i]<='Z')))||(filewriter[i]<='9'&&filewriter[i]>='0')))
{
processstring+=filewriter[i];
i++;
}
return processstring;
}
map<string,int>p;//种类编码的映射
void init()//映射种类编码
{
int i=0;
for(auto &it:reseve)
{
i++;
p[it]=i;
}
}
bool checkfile(string filewriter)//判断文件名是否为.c文件
{
if(filewriter.length()<3)
return false;
if(filewriter[filewriter.length()-1]=='c'&&filewriter[filewriter.length()-2]=='.')
return true;
else
return false;
}
string scanner(string filename)//扫描程序,过滤注释程序
{
if(!checkfile(filename))
{
cout<<"这不是c文件"<<endl;
error();
}
ifstream inf(filename);
string filewriter;
string processstring="";
int i;
bool flag1=false;
bool flag2=false;
while(getline(inf,filewriter))
{
flag1=false;
for(i=0; i<filewriter.length(); i++)
{
if(i+1<filewriter.length()&&filewriter[i]=='/'&&filewriter[i+1]=='/')
{
if(!flag2&&!flag1)
{
if(i>=1)
{
processstring+=(filewriter.substr(0,i-1)+" ");
}
filewriter=filewriter.substr(i+2);
i=0;
flag1=true;
continue;
}
flag1=true;
i++;
}
if(i+1<filewriter.length()&&i<filewriter.length()&&filewriter[i]=='/'&&filewriter[i+1]=='*'&&!flag2)
{
flag2=true;
if(!flag1)
{
if(i>=1)
processstring+=(filewriter.substr(0,i-1)+" ");
filewriter=filewriter.substr(i+2);
i=0;
continue;
}
}
if(i+1<filewriter.length()&&i<filewriter.length()&&filewriter[i]=='*'&&filewriter[i+1]=='/'&&flag2)
{
flag2=false;
filewriter=filewriter.substr(i+2);
i=0;
continue;
}
}
if(!flag1&&!flag2)
processstring+=(filewriter+"\n");
}
inf.close();
return processstring;
}
void error()
{
cout<<"error"<<endl;
exit(0);
}
int main()
{
init();
string filewriter,processstring;
filewriter=scanner("inii.c");
ofstream onf;
onf.open("Resultfile.c");
int i=0;
cout<<"---扫描程序之后的程序---"<<endl;
cout<<filewriter<<endl;
cout<<"---单词---种类编码---单词种类---"<<endl;
onf<<"---扫描程序之后的程序---"<<endl;
onf<<filewriter<<endl;
onf<<"---单词---种类编码---单词种类---"<<endl;
while(i<filewriter.length())
{
while(i<filewriter.length()&&(filewriter[i]=='\n'||filewriter[i]==' '||filewriter[i]=='\t'))//过滤空白符
i++;
if(i>=filewriter.length())//判断过滤完空白符后已经到文件末尾
break;
if(filewriter[i]=='\"')//判断为字符串常量
{
cout<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
onf<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
i++;
processstring=checkstring(filewriter,i);
cout<<"("<<processstring<<","<<83<<","<<"字符串常量)"<<endl;
onf<<"("<<processstring<<","<<83<<","<<"字符串常量)"<<endl;
if(i<filewriter.length()&&filewriter[i]=='\"')
{
cout<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
onf<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl;
i++;
}
else
{
error();
}
}
else if(filewriter[i]=='\'')//判断是否为字符常量
{
cout<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
onf<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
i++;
processstring=checkchar(filewriter,i);
cout<<"("<<processstring<<","<<84<<","<<"字符常量)"<<endl;
onf<<"("<<processstring<<","<<84<<","<<"字符常量)"<<endl;
if(i<filewriter.length()&&filewriter[i]=='\'')
{
cout<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
onf<<"("<<'\''<<","<<42<<","<<"界符)"<<endl;
i++;
}
else
{
error();
}
}
else if(filewriter[i]<='9'&&filewriter[i]>='0')//判断是否为数字常量
{
processstring=checkdight(filewriter,i);
cout<<"("<<processstring<<","<<82<<","<<"数字常量)"<<endl;
onf<<"("<<processstring<<","<<82<<","<<"数字常量)"<<endl;
}
else if(checkdeadline(filewriter[i]))//判断是否为运算符或者界符
{
processstring=checkoperator(filewriter,i);
if(p[processstring]<=42&&p[processstring]>=35)
{
cout<<"("<<processstring<<","<<p[processstring]<<","<<"界符)"<<endl;
onf<<"("<<processstring<<","<<p[processstring]<<","<<"界符)"<<endl;
}
else
{
cout<<"("<<processstring<<","<<p[processstring]<<","<<"运算符)"<<endl;
onf<<"("<<processstring<<","<<p[processstring]<<","<<"运算符)"<<endl;
}
}
else if(filewriter[i]=='_'||checkletterchar(filewriter[i]))//判断是否为标识符或者关键字
{
processstring=checkkey(filewriter,i);
if(p[processstring]!=0)
{
cout<<"("<<processstring<<","<<p[processstring]<<","<<"关键字)"<<endl;
onf<<"("<<processstring<<","<<p[processstring]<<","<<"关键字)"<<endl;
}
else
{
cout<<"("<<processstring<<","<<81<<","<<"标识符)"<<endl;
onf<<"("<<processstring<<","<<81<<","<<"标识符)"<<endl;
}
}
else if(filewriter[i]=='#')//特殊字符
{
processstring="";
while(i<filewriter.length()&&(filewriter[i]!='\n'))
processstring+=filewriter[i],i++;
cout<<"("<<processstring<<","<<43<<","<<"宏定义)"<<endl;
onf<<"("<<processstring<<","<<43<<","<<"宏定义)"<<endl;
i++;
}
else
{
error();
}
}
return 0;
}
浙公网安备 33010602011771号