编译原理 实验 简单的词法分析器

代码

#include<bits/stdc++.h>
using namespace std;
typedef pair<string,int> PSI;
map<string,int> kt = {
{"int",1},{"void",2},{"break",3},{"float",4},{"while",5},
    {"do",6},{"struct",7},{"const",8},{"case",9},{"for",10},
    {"return",11},{"if",12},{"default",13},{"else",14}
};
map<string,int> pt = {
    {"-",1},{"/",2},{"(",3},{")",4},{"==",5},{"<=",6},{"<",7},
    {"+",8},{"*",9},{">",10},{"=",11},{",",12},{";",13},{"++",14},
    {"{",15},{"}",16}
};
const int N = 105;
map<string,int>id;
map<string,int>c2;
map<string,int>ct;
map<string,int>c1;
map<string,int>st;
vector<string> idd,c22,ctt,c11,stt;

int id_idx,c1_idx,c2_idx,ct_idx,st_idx;
string s,tmp;
int idx = 0,state = 1,tp=0;
bool OK = true;
vector<PSI>Token;
void fix() {
    if (tmp=="")
        return ;
    if (kt[tmp]) {
        Token.push_back({"K",kt[tmp]});
    }
    else if (pt[tmp]) {
        Token.push_back({"P",pt[tmp]});
    }
    else if (id[tmp]){
        Token.push_back({"I",id[tmp]});
    }
    else if (c1[tmp]) {
        Token.push_back({"C1",c1[tmp]});
    }
    else if (c2[tmp]) {
        Token.push_back({"C2",c2[tmp]});
    }
    else if (ct[tmp]) {
        if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
            string d;
            for (int i = 1; i< tmp.size()-1;i++) {
                d.push_back(tmp[i]);
            }
            tmp = d;
        }
        Token.push_back({"CT",ct[tmp]});
    }
    else if (st[tmp]) {
        if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
            string d;
            for (int i = 1; i< tmp.size()-1;i++) {
                d.push_back(tmp[i]);
            }
            tmp = d;
        }
        Token.push_back({"ST",st[tmp]});
    }
    else {
          if (tp==1) {
                id[tmp]= ++id_idx;
                Token.push_back({"I",id[tmp]});
                idd.push_back(tmp);
            }
         else   if (tp==2)
        {
                c1[tmp] = ++c1_idx;
                Token.push_back({"C1",c1[tmp]});
             c11.push_back(tmp);
            }
    else if (tp==3) {
                c2[tmp] = ++c2_idx;
                Token.push_back({"C2",c2[tmp]});
                c22.push_back(tmp);
    }
            else if (tp==4) {
                if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
                    string d;
                    for (int i = 1; i< tmp.size()-1;i++) {
                        d.push_back(tmp[i]);
                    }
                    tmp = d;
                }
                ct[tmp] = ++ct_idx;
                Token.push_back({"CT",ct[tmp]});
                ctt.push_back(tmp);
            }
            else if (tp== 5) {
                if ((tmp[0] =='\'' && tmp[tmp.size()-1]=='\'') ||(tmp[0] =='\"' && tmp[tmp.size()-1]=='\"') ) {
                    string d;
                    for (int i = 1; i< tmp.size()-1;i++) {
                        d.push_back(tmp[i]);
                    }
                    tmp = d;
                }
                st[tmp] = ++st_idx;
                Token.push_back({"ST",st[tmp]});
                stt.push_back(tmp);
            }
    }
    tmp = "";
}
long long int ox16_to_d(string num) {
    string s = "";
    long long int res = 0;
    for (int i = 2; i < num.length(); i++) {
        res = res * 16 + ((num[i]>='0'&&num[i]<='9')?(num[i]-'0'):(num[i]-'a' + 10));
    }
    return res;
}
inline bool isaz(char c) {
    return isalpha(c) ||c=='_';
}
typedef pair<char,int> PCI;
map<PCI,int>act;
bool es[N];
int main() {
    // while (cin>>tmp) {
    //     if (tmp =="$")
    //         break;
    //     s +=" " + tmp + " ";
    // }
    getline(cin,s);
    s +=" ";
    for (int i = 0; i< s.size(); i++) {
        auto e = s[i];
        if (state==1) {
            if (e == '0') {
                state=2;
                tmp.push_back('0');
                continue;
            }
            if (e >='1' && e<='9') {
                state = 3;
                tmp.push_back(e);
                continue;
            }
            if (e==' ' || e=='\n' || e=='\t') {
                continue;
            }
            string g;
            g.push_back(e);
            if (pt[g] && g !="+" && g!="=" && g!="<") {
                state= 10;
                tmp.push_back(e);
                continue;
            }
            if (isaz(e)) {
                state = 9;
                tmp.push_back(e);
                continue;
            }
            if (e=='+') {
                state = 13;
                tmp.push_back(e);
                continue;
            }
            if (e=='=') {
                state=11;
                tmp.push_back(e);
                continue;
            }
            if (e=='\'') {
                state = 18;
                tmp.push_back(e);
                continue;
            }
            if (e=='\"') {
                state = 17;
                tmp.push_back(e);
                continue;
            }
            state = 1;
            continue;
        }
        if (state == 2) {
            if (e=='x') {
                state = 4;
                tmp.push_back(e);
                continue;
            }
            if (e<='9' && e>='0') {
                state = 3;
                tmp.push_back(e);
                continue;
            }
            if (e=='.') {
                state = 5;
                tmp.push_back(e);
                continue;
            }
            tp=2;
            state =1;
            fix();
            i--;
            continue;
        }
        if (state == 3) {
            if (e>='0' && e<='9') {
                tmp.push_back(e);
                continue;
            }
            if (e=='.') {
                state = 5;
                tmp.push_back(e);
                continue;
            }
            if (e=='e') {
                state = 6;
                tmp.push_back(e);
                continue;
            }
            tp = 2;
            state = 1;
            fix();
            i--;
            continue;
        }
        if (state == 4)
        {
            if ((e>='0' && e<='9' )||(e>='a' && e<='f')) {
                tmp.push_back(e);
                continue;
            }
            tp = 2;
            fix();
            state = 1;
            i--;
            continue;
        }
        if (state == 5) {
            if (e >='0' &&e<='9') {
                state = 5;
                tmp.push_back(e);
                continue;
            }
            if (e=='e') {
                state = 6;
                tmp.push_back(e);
                continue;
            }
            tp = 3;
            state = 1;
            fix();
            i--;
            continue;
        }
        if (state== 6) {
            if (e=='+' || e=='-' ||(e>='0' &&e<='9')) {
                state = 7;
                tmp.push_back(e);
                continue;
            }
            OK = false;
            break;
        }
        if (state == 7) {
            if ((e>='0'&&e<='9')) {
                state = 23;
                tmp.push_back(7);
                continue;
            }
            if (e=='.') {
                state = 8;
                tmp.push_back(8);
                continue;
            }
            if (!isdigit(tmp[tmp.size()-1])) {
                OK= false;
                break;
            }
            state = 1;
            tp = 3;
            i--;
            fix();
            continue;
        }
        if (state == 8) {
            if (e>='0' && e<='9') {
                tmp.push_back(e);
                continue;
            }
            tp = 3;
            i--;
            fix();
            continue;
        }
        if (state == 9) {
            if (e=='_'|| isalnum(e)) {
                tmp.push_back(e);
                continue;
            }
            state = 1;
            tp = 1;
            fix();
            i--;
            continue;
        }
        if (state == 10) {
            fix();
            i--;
            state = 1;
            continue;
        }
        if (state == 11 || state == 15) {
            if (e=='=') {
                tmp.push_back(e);
                fix();
                state = 1;
                continue;
            }
            i--;
            fix();
            state = 1;
            continue;
        }
        if (state == 13) {
            if (e =='+') {
                tmp.push_back(e);
                fix();
                state = 1;
                continue;
            }
            i--;
            fix();
            state = 1;
            continue;
        }
        if (state == 18) {
            tmp.push_back(e);
            state = 21;
            continue;
        }
        if (state == 17) {
            tmp.push_back(e);
            state = 19;
            continue;
        }
        if (state == 21) {
            if (e=='\'') {
                tmp.push_back(e);
                tp = 4;
                state = 1;
                fix();
                continue;
            }
            OK = false;
            break;
        }
        if (state == 19) {
            if (e  !='\"') {
                tmp.push_back(e);
                continue;
            }
            tmp.push_back(e);
            tp = 5;
            fix();
            state = 1;
            continue;
        }
    }
    if (!OK) {
        cout<<"ERROR\n";
        return 0;
    }
    cout<<"Token :";
    for (auto e : Token) {
        cout<<"("<<e.first<<" "<<e.second<<")";
    }
    cout<<"\n";
    cout<<"I :";
    for (auto e : idd) {
        cout<<e<<" ";
    }
    cout<<endl;

    cout<<"C1 :";
    for (auto e : c11) {
        if (e.size()>1 && e[1]=='x')
        cout<<ox16_to_d(e)<<" ";
        else
            cout<<e<<" ";
    }

    cout<<endl;

    cout<<"C2 :";
    for (auto e : c22) {
        cout<<e<<" ";
    }
    cout<<endl;

    cout<<"CT :";
    for (auto e : ctt) {
        cout<<e<<" ";
    }
    cout<<endl;
    cout<<"ST :";
    for (auto e : stt) {
        cout<<e<<" ";
    }
    cout<<endl;
    return 0;
}

``
posted @ 2025-05-14 22:11  Guaninf  阅读(28)  评论(0)    收藏  举报