c语言词法分析:C#源码

今天继续研究代码解析的算法
这个是算法流程图

 

有图解可能更直观一点;
以下是c#源码:

  1using System;
  2using System.IO;
  3using System.Text;
  4using System.Windows.Forms;
  5using System.Collections;
  6
  7namespace CodeFormatter {
  8  /// <summary>
  9  /// CodeFormatterFactory 的摘要说明。
 10  /// c 代码解析,不支持中文
 11  /// </summary>

 12  public class CodeFormatterFactory {
 13    /*源代码*/
 14    private string sourceCode = "";
 15
 16    /*C语言所有关键字,共32个*/
 17    ArrayList KeyWordList = new ArrayList();
 18
 19    /*运算、限界符*/
 20    ArrayList LimitList = new ArrayList();
 21
 22    /*常量表*/
 23    ArrayList ConstList = new ArrayList();
 24
 25    /*标识符*/
 26    ArrayList IdentifierList = new ArrayList();
 27
 28    /*输出*/
 29    ArrayList OutputList = new ArrayList();
 30
 31    public CodeFormatterFactory() {
 32      //
 33      // TODO: 在此处添加构造函数逻辑
 34      //
 35      init();
 36    }

 37
 38    public string SourceCode{
 39      get{return this.sourceCode;}
 40      set{this.sourceCode =value;}
 41    }

 42
 43    public string ParseMessages{
 44      get{
 45        string pm = "";
 46
 47        IEnumerator ie = this.OutputList.GetEnumerator();
 48        while ( ie.MoveNext() )
 49          pm += ie.Current.ToString() + "\r\n";
 50        return pm;
 51      }

 52    }

 53
 54    private void init() {
 55      /*C语言所有关键字,共32个*/
 56      string[] key=new string[]{" ","auto","break","case","char","const","continue","default","do","double",
 57                                 "else","enum","extern","float","for","goto","if","int","long","register",
 58                                 "return","short","signed","sizeof","static","struct","switch","typedef",
 59                                 "union","unsigned","void","volatile","while"}
;
 60      /*运算、限界符*/
 61      string[] limit=new string[]{" ","(",")","[","]","->",".","!","++","--","&","~",
 62                                   "*","/","%","+","-","<<",">>","<","<=",">",">=","==","!=","&&","||",
 63                                   "=","+=","-=","*=","/=",",",";","{","}","#","_","'"}
;
 64
 65      this.KeyWordList.Clear();
 66      this.KeyWordList.TrimToSize();
 67      for(int i=1;i<key.Length;i++)
 68        this.KeyWordList.Add(key[i]);
 69
 70      this.LimitList.Clear();
 71      this.LimitList.TrimToSize();
 72      for(int i=1;i<limit.Length;i++)
 73        this.LimitList.Add(limit[i]);
 74
 75      this.ConstList.Clear();
 76      this.ConstList.TrimToSize();
 77
 78      this.IdentifierList.Clear();
 79      this.IdentifierList.TrimToSize();
 80
 81      this.OutputList.Clear();
 82      this.OutputList.TrimToSize();
 83    }

 84
 85    /*******************************************
 86    * 十进制转二进制函数
 87    *******************************************/

 88    private string dtb(string buf){
 89      int[] temp= new int[20];
 90      string binary = "";
 91      int val=0,i=0;
 92
 93      /*先将字符转化为十进制数*/
 94      try{
 95        val = Convert.ToInt32(buf);
 96      }
catch{
 97        val = 0;
 98      }

 99
100      if(val==0{
101        return(val.ToString());
102      }

103
104      i=0;
105      while(val!=0{
106        temp[i++]=val%2;
107        val/=2;
108      }

109
110      binary = "";
111      for(int j=0;j<=i-1;j++)
112        binary += (char)(temp[i-j-1]+48);
113
114      return(binary);
115    }

116
117    /*******************************************
118    * 根据不同命令查表或造表函数
119    *******************************************/

120    private int find(string buf,int type,int command){     
121      int number=0;
122      string temp;
123
124      IEnumerator ie = null;
125      ArrayList al = null;
126      switch(type){
127        case 1://关键字表
128          ie = this.KeyWordList.GetEnumerator();
129          break;
130        case 2://标识符表
131          ie = this.IdentifierList.GetEnumerator();
132          break;
133        case 3://常数表
134          ie = this.ConstList.GetEnumerator();
135          break;
136        case 4://运算、限界符表
137          ie = this.LimitList.GetEnumerator();
138          break;
139      }
            
140
141      if(ie!=null)
142      while (ie.MoveNext()){
143        temp = ie.Current.ToString();
144        if(temp.Trim().ToLower()==buf.Trim().ToLower()){
145          return number;
146        }

147        number ++;
148      }

149      
150      if(command==1){
151        /*找不到,当只需查表,返回0,否则还需造表*/
152        return 0;
153      }

154
155      switch(type){
156        case 1: al = this.KeyWordList;break;
157        case 2: al = this.IdentifierList;break;
158        case 3: al = this.ConstList;break;
159        case 4: al = this.LimitList;break;
160      }

161      if(al!=null)
162        al.Add(buf);
163
164      return number + 1;
165    }

166    /*******************************************
167    * 数字串处理函数
168    *******************************************/

169    private void cs_manage(string buffer){
170      string binary = dtb(buffer);
171      int result = find(binary,3,2);
172      this.OutputList.Add(String.Format("{0}\t\t\t3\t\t\t{1}",buffer,result));
173    }

174
175    /*******************************************
176    * 字符串处理函数 
177    *******************************************/

178    private void ch_manage(string buffer) {
179      int result = find(buffer,1,1);
180      if(result!=0){
181        this.OutputList.Add(String.Format("{0}\t\t\t1\t\t\t{1}",buffer,result));
182      }
else{
183        result = find(buffer,2,2);
184        this.OutputList.Add(String.Format("{0}\t\t\t2\t\t\t{1}",buffer,result));
185      }

186    }

187
188    /*******************************************
189    * 出错处理函数
190    *******************************************/

191    private void er_manage(char error,int lineno) {
192      this.OutputList.Add(String.Format("错误关键字: {0} ,所在行: {1}",error,lineno));
193    }

194
195    /*******************************************
196    * 转换Char数组为string
197    ******************************************/

198    private string joinString(char[] array,int Length){
199      string s = "";
200      if(array.Length>0)
201        for(int i=0;i<Length;i++){
202          if(array[i]!='\0'{
203            s+=array[i];
204          }
else{
205            break;
206          }

207        }

208      return s;
209    }

210
211    private char getchc(ref int n){
212      char[] c = sourceCode.ToCharArray();
213      if(n<c.Length){
214        char r = c[n];      
215        n++;
216        return r;
217      }

218      return sourceCode[sourceCode.Length-1];
219    }

220    /*******************************************
221    * 扫描程序
222    ********************************************/

223    public void Parse() {
224      //StreamWriter fpout = null;
225      char ch ;
226      int i=0,line=1;
227      int count,result,errorno=0;
228      char[] array = new char[30];
229      string word= "";
230
231      /*按字符依次扫描源程序,直至结束*/
232      int n = 0;
233
234      while(n<sourceCode.Length-1){
235        i = 0;
236        ch = getchc(ref n);
237        /*以字母开头*/
238        if(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z'))||(ch=='_')) {                    
239          while(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z'))||(ch=='_')||((ch>='0')&&(ch<='9'))) {
240            array[i++]=ch;
241            ch = getchc(ref n);
242          }

243          array[i++= '\0';
244          word = joinString(array,array.Length);
245          ch_manage(word);
246          if(n<sourceCode.Length)n--;
247        }
else if(ch>='0'&&ch<='9'{
248          /*以数字开头*/
249          while(ch>='0'&&ch<='9'{
250            array[i++]=ch;
251            ch = getchc(ref n);
252          }

253          array[i++= '\0';
254          word=joinString(array,array.Length);
255          cs_manage(word);
256          if(n<sourceCode.Length)n--;
257        }

258        else if((ch==' ')||(ch=='\t'))
259          /*消除空格符和水平制表符*/
260          ;
261        else if(ch=='\n')
262          /*消除回车并记录行数*/
263          line++;
264        else if(ch=='/'{
265          /*消除注释*/                          
266          ch = getchc(ref n);
267          if(ch=='='{
268            /*判断是否为‘/=’符号*/
269            this.OutputList.Add(String.Format("/=\t\t\t4\t\t\t32"));
270          }

271          else if(ch!='*'{
272            /*若为除号,写入输出*/
273            this.OutputList.Add(String.Format("/\t\t\t4\t\t\t13"));
274            n--;
275          }
 else if(ch=='*'{
276            /*若为注释的开始,消除包含在里面的所有字符*/
277            count=0;
278            ch = getchc(ref n);
279            while(count!=2{
280              /*当扫描到‘*’且紧接着下一个字符为‘/’才是注释的结束*/
281              count=0;
282              while(ch!='*')
283                ch = getchc(ref n);
284              count++;
285              ch = getchc(ref n);
286              if(ch=='/')
287                count++;
288              else
289                ch = getchc(ref n);
290            }

291          }

292        }

293        else if(ch=='"'{
294          /*消除包含在双引号中的字符串常量*/
295          this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t37",ch));
296          while(ch!='"')
297            ch = getchc(ref n);
298          this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t37",ch));
299        }

300        else {
301          /*首字符为其它字符,即运算限界符或非法字符*/
302          array[0]=ch;
303          /*再读入下一个字符,判断是否为双字符运算、限界符*/
304          ch = getchc(ref n);
305          /*若该字符非结束符*/
306          if(n<sourceCode.Length) {
307            array[1]=ch;
308            array[2= '\0';
309            word = joinString(array,2);
310            result=find(word,4,1); /*先检索是否为双字符运算、限界符*/
311            if(result==0{
312              /*若不是*/    
313              array[2= '\0';
314              word = joinString(array,1);
315              result=find(word,4,1);      
316              /*检索是否为单字符运算、限界符*/
317              if(result==0{
318                /*若还不是,则为非法字符*/
319                er_manage(array[0],line);
320                errorno++;
321                n--;
322              }

323              else {
324                /*若为单字符运算、限界符,写入输出并将扫描指针回退一个字符*/
325                this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t{1}\t",word,result));
326                n--;
327              }

328            }

329            else {
330              /*若为双字符运算、限界符,写输出*/
331              this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t{1}",word,result));
332            }

333          }

334          else {
335            /*若读入的下一个字符为结束符*/
336            array[2= '\0';
337            word = joinString(array,1);
338            /*只考虑是否为单字符运算、限界符*/
339            result=find(word,4,1);
340            /*若不是,转出错处理*/
341            if(result==0)
342              er_manage(array[0],line);
343            else {
344              /*若是,写输出*/
345              this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t{1}",word,result));
346            }

347          }

348        }

349        ch = getchc(ref n);
350      }

351      /*报告错误字符个数*/
352      this.OutputList.Add(String.Format("\n共有 {0} 个错误.\n",errorno));
353    }

354
355  }

356}

357

 

 

代码可能似曾相识,因为我是参考的一篇C的代码;

这里下载工程源码(带C代码) 
2005年4月22日 S.F.

posted @ 2005-04-22 16:18  suifei  阅读(10338)  评论(9编辑  收藏  举报