代码
#include<bits/stdc++.h>
using namespace std;
typedef pair<string,int> PSI;
map<string,int> kt = {
{"int",1},{"void",2},{"break",3},{"float",4},{"while",5},
{"do",6},{"struct",7},{"const",8},{"case",9},{"for",10},
{"return",11},{"if",12},{"default",13},{"else",14}
};
map<string,int> pt = {
{"-",1},{"/",2},{"(",3},{")",4},{"==",5},{"<=",6},{"<",7},
{"+",8},{"*",9},{">",10},{"=",11},{",",12},{";",13},{"++",14},
{"{",15},{"}",16}
};
const int N = 105;
map<string,int>id;
map<string,int>c2;
map<string,int>ct;
map<string,int>c1;
map<string,int>st;
vector<string> idd,c22,ctt,c11,stt;
int id_idx,c1_idx,c2_idx,ct_idx,st_idx;
string s,tmp;
int idx = 0,state = 1,tp=0;
bool OK = true;
vector<PSI>Token;
void fix() {
if (tmp=="")
return ;
if (kt[tmp]) {
Token.push_back({"K",kt[tmp]});
}
else if (pt[tmp]) {
Token.push_back({"P",pt[tmp]});
}
else if (id[tmp]){
Token.push_back({"I",id[tmp]});
}
else if (c1[tmp]) {
Token.push_back({"C1",c1[tmp]});
}
else if (c2[tmp]) {
Token.push_back({"C2",c2[tmp]});
}
else if (ct[tmp]) {
if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
string d;
for (int i = 1; i< tmp.size()-1;i++) {
d.push_back(tmp[i]);
}
tmp = d;
}
Token.push_back({"CT",ct[tmp]});
}
else if (st[tmp]) {
if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
string d;
for (int i = 1; i< tmp.size()-1;i++) {
d.push_back(tmp[i]);
}
tmp = d;
}
Token.push_back({"ST",st[tmp]});
}
else {
if (tp==1) {
id[tmp]= ++id_idx;
Token.push_back({"I",id[tmp]});
idd.push_back(tmp);
}
else if (tp==2)
{
c1[tmp] = ++c1_idx;
Token.push_back({"C1",c1[tmp]});
c11.push_back(tmp);
}
else if (tp==3) {
c2[tmp] = ++c2_idx;
Token.push_back({"C2",c2[tmp]});
c22.push_back(tmp);
}
else if (tp==4) {
if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
string d;
for (int i = 1; i< tmp.size()-1;i++) {
d.push_back(tmp[i]);
}
tmp = d;
}
ct[tmp] = ++ct_idx;
Token.push_back({"CT",ct[tmp]});
ctt.push_back(tmp);
}
else if (tp== 5) {
if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
string d;
for (int i = 1; i< tmp.size()-1;i++) {
d.push_back(tmp[i]);
}
tmp = d;
}
st[tmp] = ++st_idx;
Token.push_back({"ST",st[tmp]});
stt.push_back(tmp);
}
}
tmp = "";
}
long long int ox16_to_d(string num) {
string s = "";
long long int res = 0;
for (int i = 2; i < num.length(); i++) {
res = res * 16 + ((num[i]>='0'&&num[i]<='9')?(num[i]-'0'):(num[i]-'a' + 10));
}
return res;
}
inline bool isaz(char c) {
return isalpha(c) ||c=='_';
}
typedef pair<char,int> PCI;
map<PCI,int>act;
bool es[N];
int main() {
// while (cin>>tmp) {
// if (tmp =="$")
// break;
// s +=" " + tmp + " ";
// }
getline(cin,s);
s +=" ";
for (int i = 0; i< s.size(); i++) {
auto e = s[i];
if (state==1) {
if (e == '0') {
state=2;
tmp.push_back('0');
continue;
}
if (e >='1' && e<='9') {
state = 3;
tmp.push_back(e);
continue;
}
if (e==' ' || e=='\n' || e=='\t') {
continue;
}
string g;
g.push_back(e);
if (pt[g] && g !="+" && g!="=" && g!="<") {
state= 10;
tmp.push_back(e);
continue;
}
if (isaz(e)) {
state = 9;
tmp.push_back(e);
continue;
}
if (e=='+') {
state = 13;
tmp.push_back(e);
continue;
}
if (e=='=') {
state=11;
tmp.push_back(e);
continue;
}
if (e=='\'') {
state = 18;
tmp.push_back(e);
continue;
}
if (e=='\"') {
state = 17;
tmp.push_back(e);
continue;
}
state = 1;
continue;
}
if (state == 2) {
if (e=='x') {
state = 4;
tmp.push_back(e);
continue;
}
if (e<='9' && e>='0') {
state = 3;
tmp.push_back(e);
continue;
}
if (e=='.') {
state = 5;
tmp.push_back(e);
continue;
}
tp=2;
state =1;
fix();
i--;
continue;
}
if (state == 3) {
if (e>='0' && e<='9') {
tmp.push_back(e);
continue;
}
if (e=='.') {
state = 5;
tmp.push_back(e);
continue;
}
if (e=='e') {
state = 6;
tmp.push_back(e);
continue;
}
tp = 2;
state = 1;
fix();
i--;
continue;
}
if (state == 4)
{
if ((e>='0' && e<='9' )||(e>='a' && e<='f')) {
tmp.push_back(e);
continue;
}
tp = 2;
fix();
state = 1;
i--;
continue;
}
if (state == 5) {
if (e >='0' &&e<='9') {
state = 5;
tmp.push_back(e);
continue;
}
if (e=='e') {
state = 6;
tmp.push_back(e);
continue;
}
tp = 3;
state = 1;
fix();
i--;
continue;
}
if (state== 6) {
if (e=='+' || e=='-' ||(e>='0' &&e<='9')) {
state = 7;
tmp.push_back(e);
continue;
}
OK = false;
break;
}
if (state == 7) {
if ((e>='0'&&e<='9')) {
state = 23;
tmp.push_back(7);
continue;
}
if (e=='.') {
state = 8;
tmp.push_back(8);
continue;
}
if (!isdigit(tmp[tmp.size()-1])) {
OK= false;
break;
}
state = 1;
tp = 3;
i--;
fix();
continue;
}
if (state == 8) {
if (e>='0' && e<='9') {
tmp.push_back(e);
continue;
}
tp = 3;
i--;
fix();
continue;
}
if (state == 9) {
if (e=='_'|| isalnum(e)) {
tmp.push_back(e);
continue;
}
state = 1;
tp = 1;
fix();
i--;
continue;
}
if (state == 10) {
fix();
i--;
state = 1;
continue;
}
if (state == 11 || state == 15) {
if (e=='=') {
tmp.push_back(e);
fix();
state = 1;
continue;
}
i--;
fix();
state = 1;
continue;
}
if (state == 13) {
if (e =='+') {
tmp.push_back(e);
fix();
state = 1;
continue;
}
i--;
fix();
state = 1;
continue;
}
if (state == 18) {
tmp.push_back(e);
state = 21;
continue;
}
if (state == 17) {
tmp.push_back(e);
state = 19;
continue;
}
if (state == 21) {
if (e=='\'') {
tmp.push_back(e);
tp = 4;
state = 1;
fix();
continue;
}
OK = false;
break;
}
if (state == 19) {
if (e !='\"') {
tmp.push_back(e);
continue;
}
tmp.push_back(e);
tp = 5;
fix();
state = 1;
continue;
}
}
if (!OK) {
cout<<"ERROR\n";
return 0;
}
cout<<"Token :";
for (auto e : Token) {
cout<<"("<<e.first<<" "<<e.second<<")";
}
cout<<"\n";
cout<<"I :";
for (auto e : idd) {
cout<<e<<" ";
}
cout<<endl;
cout<<"C1 :";
for (auto e : c11) {
if (e.size()>1 && e[1]=='x')
cout<<ox16_to_d(e)<<" ";
else
cout<<e<<" ";
}
cout<<endl;
cout<<"C2 :";
for (auto e : c22) {
cout<<e<<" ";
}
cout<<endl;
cout<<"CT :";
for (auto e : ctt) {
cout<<e<<" ";
}
cout<<endl;
cout<<"ST :";
for (auto e : stt) {
cout<<e<<" ";
}
cout<<endl;
return 0;
}
``