<C++编程艺术>之对于C++语法的拓展, 主要包括cases ... to ..., repeat..until..., foreach(...in...), breakon(...)等.
当然这一切都是不严格的, 只是demo一下而已, 把扩展后的C++代码解释成真正的C++代码, 没有做太多错误检查.
后来想了几个可扩展的点, 不过基于这段代码来实现的话会麻烦些, 比如用if else 模拟一个支持string类的switch, 对于switch的case里面的代码块用{}来wrap一下等,感觉使用递归下降的方法会很好搞~
下面是书里的代码:
1 // A translator for experimental C++ extensions. 2 #include <iostream> 3 #include <fstream> 4 #include <cctype> 5 #include <cstring> 6 #include <string> 7 8 using namespace std; 9 10 // Prototypes for the functions that handle 11 // the extended keywords. 12 void foreach(); 13 void cases(); 14 void repeat(); 15 void until(); 16 void typeof(); 17 void breakon(); 18 19 void ignore(); 20 void elif(); 21 22 // Prototypes for tokenizing the input file. 23 bool gettoken(string &tok); 24 void skipspaces(); 25 26 // Indentation padding string. 27 string indent = ""; 28 29 // The input and output file streams. 30 ifstream fin; 31 ofstream fout; 32 33 // Exception class for syntax errors. 34 class SyntaxExc { 35 string what; 36 public: 37 SyntaxExc(char *e) { what = string(e); } 38 string geterror() { return what; } 39 }; 40 41 int main(int argc, char *argv[]) { 42 string token; 43 bool switch_start = true; 44 45 if(argc != 3) { 46 cout << "Usage: ep <input file> <output file>\n"; 47 return 1; 48 } 49 50 fin.open(argv[1]); 51 52 if(!fin) { 53 cout << "Cannot open " << argv[1] << endl; 54 return 1; 55 } 56 57 fout.open(argv[2]); 58 59 if(!fout) { 60 cout << "Cannot open " << argv[2] << endl; 61 return 1; 62 } 63 64 // Write header. 65 fout << "// Translated from an .exp source file.\n"; 66 67 try { //为什么要优先于foreach来处理这几个情况呢, 因为foreach有可能在comment里面, 这样就会造成错误的解释. 68 // Main translation loop. 69 while(gettoken(token)) { 70 71 // Skip over // comments. 72 if(token == "//") { 73 do { 74 fout << token; 75 gettoken(token); 76 } while(token.find('\n') == string::npos); 77 fout << token; 78 } 79 80 // Skip over /* comments. 81 else if(token == "/*") { 82 do { 83 fout << token; 84 gettoken(token); 85 } while(token != "*/"); 86 fout << token; 87 } 88 89 // Skip over quoted string. 90 else if(token == "\"") { 91 do { 92 fout << token; 93 gettoken(token); 94 } while(token != "\""); 95 fout << token; 96 } 97 else if(token == "\'") 98 { 99 do 100 { 101 fout << token; 102 gettoken(token); 103 } while (token != "\'"); 104 fout << token; 105 } 106 107 else if(token == "foreach") foreach(); 108 109 else if(token == "cases") cases(); 110 111 else if(token == "repeat") repeat(); 112 113 else if(token == "until") until(); 114 115 else if(token == "typeof") typeof(); 116 117 else if(token == "breakon") breakon(); 118 119 else if(token == "ignore") ignore(); 120 121 else if(token == "elif") elif(); 122 123 else fout << token; 124 } 125 } catch(SyntaxExc exc) { 126 cout << exc.geterror() << endl; 127 return 1; 128 } 129 130 return 0; 131 } 132 133 134 // Get the next token from the input stream. 135 bool gettoken(string &tok) { 136 char ch; 137 char ch2; 138 static bool trackIndent = true; 139 140 tok = ""; 141 142 ch = fin.get(); 143 144 // Check for EOF and return false if EOF 145 // is found. 146 if(!fin) return false; 147 148 // Read whitespace. 149 if(isspace(ch)) { 150 while(isspace(ch)) { 151 tok += ch; 152 153 // Reset indent counter with each new line. 154 if(ch == '\n') { 155 indent = ""; 156 trackIndent = true; 157 } 158 else if(trackIndent) indent += ch; 159 160 ch = fin.get(); 161 } 162 fin.putback(ch); //为什么要回退呢? 因为读到一个非空白的, 后面还会有用~~ 163 return true; 164 } 165 166 // Stop tracking indentation after encountering 167 // first non-whitespace character on a line. 168 trackIndent = false; 169 170 // Read an identifier or keyword. 171 if(isalpha(ch) || ch=='_') { 172 while(isalpha(ch) || isdigit(ch) || ch=='_') { 173 tok += ch; 174 ch = fin.get(); 175 } 176 fin.putback(ch); 177 return true; 178 } 179 180 // Read a number. 181 if(isdigit(ch)) { 182 while(isdigit(ch) || ch=='.' || 183 tolower(ch) == 'e' || 184 ch == '-' || ch =='+') { 185 tok += ch; 186 ch = fin.get(); 187 } 188 fin.putback(ch); 189 return true; 190 } 191 192 // Check for \" 193 if(ch == '\\') { 194 ch2 = fin.get(); 195 tok += ch; 196 tok += ch2; 197 return true; 198 199 } 200 if(ch == '=') 201 { 202 ch2 = fin.get(); 203 if(ch2 == '=') 204 { 205 tok += "=="; 206 } 207 else 208 { 209 tok += "="; 210 fin.putback(ch2); 211 } 212 return true; 213 } 214 215 216 // Check for begin comment symbols. 217 if(ch == '/') { 218 tok += ch; 219 ch = fin.get(); 220 if(ch == '/' || ch == '*') { 221 tok += ch; 222 } 223 else fin.putback(ch); 224 return true; 225 } 226 227 // Check for end comment symbols. 228 if(ch == '*') { 229 tok += ch; 230 ch = fin.get(); 231 if(ch == '/') { 232 tok += ch; 233 } 234 else fin.putback(ch); 235 return true; 236 } 237 238 tok += ch; 239 240 return true; 241 } 242 243 // Translate a foreach loop. 244 void foreach() { 245 static bool flag = false; 246 string token; 247 string varname; 248 string arrayname; 249 250 char forvarname[20] = "_foreach_idx_"; 251 //static char counter[2] = "a"; 252 static int counter = 0; 253 char buf[50]; 254 sprintf(buf, "%d", counter); 255 256 // Create loop control variable for generated 257 // for loop. 258 259 strcat(forvarname, buf); 260 261 counter++; 262 263 //counter[0]++; 264 265 // Only 26 foreach loops in a file because 266 // generated loop control variables limited to 267 // _ia to _iz. This can be changed if desired. 268 //if(counter[0] > 'z') 269 //throw SyntaxExc("Too many foreach loops."); 270 271 fout << "int " << forvarname 272 << " = 0;\n"; 273 274 // Write beginning of generated for loop. 275 fout << indent << "for("; 276 277 skipspaces(); 278 279 // Read the ( 280 gettoken(token); 281 if(token[0] != '(') 282 throw SyntaxExc("( expected in foreach."); 283 284 skipspaces(); 285 286 // Get the type of the foreach variable. 287 gettoken(token); 288 fout << token << " "; 289 290 skipspaces(); 291 292 // Read and save the foreach variable's name. 293 gettoken(token); 294 varname = token; 295 296 skipspaces(); 297 298 // Read the "in" 299 gettoken(token); 300 if(token != "in") 301 throw SyntaxExc("in expected in foreach."); 302 303 skipspaces(); 304 305 // Read the array name. 306 gettoken(token); 307 arrayname = token; 308 309 fout << varname << " = " << arrayname << "[0];\n"; 310 311 // Construct target value. 312 fout << indent + " " << forvarname << " < " 313 << "((sizeof " << token << ")/" 314 << "(sizeof " << token << "[0]));\n"; 315 316 fout << indent + " " << forvarname << "++, " 317 << varname << " = " << arrayname << "[" 318 << forvarname << "])"; 319 320 skipspaces(); 321 322 // Read the ) 323 gettoken(token); 324 if(token[0] != ')') 325 throw SyntaxExc(") expected in foreach."); 326 } 327 328 // Translate a cases statement. 329 void cases() { 330 string token; 331 int start, end; 332 333 skipspaces(); 334 335 // Get starting value. 336 gettoken(token); 337 338 if(isdigit(token[0])) { 339 // is an int constant 340 start = atoi(token.c_str()); 341 } 342 else if(token[0] == '\'') { 343 // is char constant 344 gettoken(token); 345 if(token.size() != 1) throw SyntaxExc("invalid char constant~"); 346 347 start = (int) token[0]; 348 349 // discard closing ' 350 gettoken(token); 351 if(token[0] != '\'') 352 throw SyntaxExc("' expected in cases."); 353 } 354 else 355 throw SyntaxExc("Constant expected in cases."); 356 357 skipspaces(); 358 359 // Read and discard the "to". 360 gettoken(token); 361 if(token != "to") 362 throw SyntaxExc("to expected in cases."); 363 364 skipspaces(); 365 366 // Get ending value. 367 gettoken(token); 368 369 if(isdigit(token[0])) { 370 // is an int constant 371 end = atoi(token.c_str()); 372 } 373 else if(token[0] == '\'') { 374 // is char constant 375 gettoken(token); 376 if(token.size() != 1) throw SyntaxExc("invalid char constant~"); 377 378 end = (int) token[0]; 379 380 // discard closing ' 381 gettoken(token); 382 if(token[0] != '\'') 383 throw SyntaxExc("' expected in cases."); 384 } 385 else 386 throw SyntaxExc("Constant expected in cases."); 387 388 skipspaces(); 389 390 // Read and discard the : 391 gettoken(token); 392 393 if(token != ":") 394 throw SyntaxExc(": expected in cases."); 395 396 // Generate stacked case statments. 397 fout << "case " << start << ":\n"; 398 for(int i = start+1 ; i <= end; i++) { 399 fout << indent << "case " << i << ":"; 400 if(i != end) fout << endl; 401 } 402 } 403 404 // Translate a repeat loop. 405 void repeat() { 406 fout << "do"; 407 } 408 409 // Translate an until. 410 void until() { 411 string token; 412 int parencount = 1; 413 414 fout << "while"; 415 416 skipspaces(); 417 418 // Read and store the ( 419 gettoken(token); 420 if(token != "(") 421 throw SyntaxExc("( expected in typeof."); 422 fout << "("; 423 424 // Begin while by reversing and 425 // parenthesizing the condition. 426 fout << "!("; 427 428 // Now, read the expression. 429 do { 430 if(!gettoken(token)) 431 throw SyntaxExc("Unexpected EOF encountered."); 432 433 if(token == "(") parencount++; 434 if(token == ")") parencount--; 435 436 fout << token; 437 } while(parencount > 0); 438 fout << ")"; 439 } 440 441 // Translate a typeof expression. 442 void typeof() { 443 string token; 444 string temp; 445 446 fout << "typeid("; 447 448 skipspaces(); 449 450 gettoken(token); 451 452 do { 453 temp = token; 454 455 if(!gettoken(token)) 456 throw SyntaxExc("Unexpected EOF encountered."); 457 458 if(token != "same") fout << temp; 459 } while(token != "same"); 460 461 skipspaces(); 462 463 gettoken(token); 464 465 if(token != "as") throw SyntaxExc("as expected."); 466 467 fout << ") == typeid("; 468 469 skipspaces(); 470 471 do { 472 if(!gettoken(token)) 473 throw SyntaxExc("Unexpected EOF encountered."); 474 475 fout << token; 476 } while(token != ")"); 477 fout << ")"; 478 } 479 480 void skipspaces() { 481 char ch; 482 483 do { 484 ch = fin.get(); 485 } while(isspace(ch)); 486 fin.putback(ch); 487 } 488 489 490 void breakon() 491 { 492 fout << "if"; 493 string token; 494 skipspaces(); 495 gettoken(token); 496 497 if(token != "(") 498 throw SyntaxExc("left bracket is required!"); 499 500 fout <<"("; 501 skipspaces(); 502 503 int bracketCount = 1; 504 505 do 506 { 507 gettoken(token); 508 if(token == "(") bracketCount++; 509 else if(token == ")") bracketCount--; 510 fout << token; 511 }while(bracketCount > 0); 512 513 fout << "break"; 514 515 516 } 517 518 void ignore() 519 { 520 fout << "if"; 521 skipspaces(); 522 523 string token; 524 gettoken(token); 525 if(token != "(") 526 throw SyntaxExc("( is required here\n"); 527 528 fout << "("; 529 int bracketCount = 1; 530 531 do 532 { 533 gettoken(token); 534 if(token == "(") bracketCount++; 535 else if(token == ")") bracketCount--; 536 fout << token; 537 } while (bracketCount > 0); 538 539 fout << "continue"; 540 541 542 543 } 544 545 void elif() 546 { 547 fout << "else if"; 548 }