(基于Java)编写编译器和解释器-第5A章:基于Antlr解析表达式和赋值语句及计算(连载)
Posted on 2012-07-22 19:25 Bang 阅读(1144) 评论(1) 收藏 举报本章在第3A章源代码基础上继续完善基于Antlr自动化的解析器,解释执行第5章解析的复合语句,赋值语句和表达式等相关中间码。并仿照第5章的简化标准,将一些东西简化掉,尽量能让你从最简处入手,掌握Antlr自动化构建解析器的第一步。
==>> 本章中文版源代码下载:svn co http://wci.googlecode.com/svn/branches/ch5_antlr/ 源代码使用了UTF-8编码,下载到本地请修改!
好的工具事半功倍,Antlr亦如此。antlr.org上有一个很有特色的工具antlrwors。如果使用Eclipse,可以安装插件antlrv3ide。两个工具的主要特色是可视化的创建EBNF语法,就如同你在前面章节看到的语法图一样。对于我来说,比较习惯antlrworks,它有良好的调试功能和DFA分析功能。
1 带AST构造的语法
1: program: 2: compound_statement DOT!; 3: compound_statement: 4: BEGIN statement_list END ->^(COMPOUND statement_list); 5: assignment_statement: 6: ID ASSIGN expression -> ^(ASSIGN ID expression); 7: statement: 8: compound_statement | assignment_statement; 9: statement_list: 10: statement (SEMI statement)* SEMI? -> statement+; 11: expression: 12: simple_expression (rel_ops^ simple_expression)?; 13: rel_ops: 14: LT | LE | GT | GE | NOT_EQUAL; 15: simple_expression: 16: signedterm (add_ops^ term)*; 17: signedterm: 18: (a=PLUS | a=MINUS)? term ->{a!=null&&a.getType()==MINUS}?^(NEGATE term)->term; 19: add_ops: 20: PLUS | MINUS | OR; 21: term: 22: factor (mul_ops^ factor)*; 23: mul_ops: 24: STAR | SLASH | DIV | MOD | AND; 25: factor: 26: ID | NUMBER | STRING | NOT^ factor | LPAREN! expression RPAREN!;目前的程序program由一个复合语句+结束的点"."组成。这个antlr语句基本与第五章的语法图5-1 和图5-2 类似。
2 计算Token值
填充第三章中关于计算常量token的值类ValueComputer,目前只对整数,字符串,浮点数token算值,算值逻辑与PascalStringToken和PascalNumberToken基本类似。
详细参见代码ValueComputer,这里不再显示。
3 引入符号表
这里复用第5章的符号表。需要使用符号表的地方有两个,一个是赋值语句的左边变量作为定义出现,另外一个是表达式中的标识符作为引用出现。在Antlr分析中,一般符号表必须要在语法树构建完成后才能进行,因为语法树构建过程中的节点是不清晰的。在Antlr中构建完AST之后,将使用Antlr树语法(Tree Grammar)去遍历语法,这个过程中我们可以加入符号表操作,也可以执行动作和生成代码。
4 执行赋值语句及计算语句
我原本想只想演示一下相关分析树,但是发现过于简单,于是就干脆执行算了,执行第5章的分析树是第6章内容,所有没有6A章了。
详细语法如下表:
1: tree grammar PascalVisitor; 2: options{ 3: tokenVocab=Pascal; 4: ASTLabelType=PascalNode; 5: } 6: @header{ 7: package com.lifesting.book.wci; 8: 9: import wci.intermediate.*; 10: import wci.intermediate.symtabimpl.SymTabKeyImpl; 11: } 12: @members{ 13: protected SymTabStack symtabStack = SymTabFactory.createSymTabStack(); 14: public SymTabStack getSymbolTableStack(){ 15: return this.symtabStack; 16: } 17: } 18: program : 19: compound; 20: compound : 21: ^(COMPOUND stmt+); 22: stmt: 23: compound | assign; 24: assign: 25: ^(ASSIGN i=ID e=expr){ 26: String var = $i.text.toLowerCase(); 27: SymTabEntry id_entry = symtabStack.lookup(var) ; 28: if (id_entry == null) 29: { 30: id_entry = symtabStack.enterLocal(var); 31: } 32: id_entry.setAttribute(SymTabKeyImpl.DATA_VALUE,e); 33: }; 34: expr returns[Object value]: 35: s=simple{value=s;} 36: | ^(r=rel_ops e1=expr e2=expr){ 37: if (e1 instanceof Number && e2 instanceof Number){ 38: double de1 = ((Number)e1).doubleValue(); 39: double de2 = ((Number)e2).doubleValue(); 40: switch (r){ 41: case 1:42: value = de1 < de2;
43: break; 44: case 2:45: value = de1 <= de2;
46: break; 47: case 3:48: value = de1 > de2;
49: break; 50: case 4:51: value = de1 >= de2;
52: break; 53: case 5: 54: value = de1 != de2; 55: break; 56: default: 57: break; 58: } 59: }else{ 60: System.err.println("无法执行比较:"+e1+"["+r+"]"+e2); 61: } 62: }; 63: simple returns[Object value]: 64: s=term{value=s;} 65: | ^(o=add_ops f1=negterm f2=simple) 66: { 67: double df1 = Double.parseDouble(f1.toString()); 68: if (f2 instanceof Number){ 69: double df2 = Double.parseDouble(f2.toString()); 70: switch (o) 71: { 72: case 1: 73: value = df1+df2; 74: break; 75: case 2: 76: value = df1-df2; 77: break; 78: } 79: }else{ 80: System.err.println("不是一个数值:"+f2); 81: } 82: } 83: | ^(o=add_ops f1=simple f2=simple){ 84: if (f1 instanceof Number && f2 instanceof Number){ 85: double df1 = Double.parseDouble(f1.toString()); 86: double df2 = Double.parseDouble(f2.toString()); 87: switch (o){ 88: case 1: 89: value = df1+df2; 90: break; 91: case 2: 92: value = df1-df2; 93: break; 94: default: 95: break; 96: } 97: }else if (f1 instanceof Boolean && f2 instanceof Boolean){ 98: value= ((Boolean)f1).booleanValue() || ((Boolean)f2).booleanValue(); 99: }else{ 100: System.err.println("不能执行simple运算,f1="+f1+",f2="+f2); 101: } 102: }; 103: 104: 105: negterm returns[Object value]: 106: ^(NEGATE n=term) { 107: if(n instanceof Number){ 108: return -Double.parseDouble(n.toString()); 109: }else{ 110: System.err.println("不是一个数值:"+n); 111: value = 0.0; 112: } 113: }; 114: 115: term returns[Object value]: 116: f0=factor{value=f0;} 117: | ^(t=mul_ops f1=factor f2=factor){ 118: if (f1 instanceof Number && f2 instanceof Number){ 119: double df1 = Double.parseDouble(f1.toString()); 120: double df2 = Double.parseDouble(f2.toString()); 121: switch (t){ 122: case 1: 123: value= df1*df2; 124: break; 125: case 2: 126: case 3: 127: value= df1/df2; 128: case 4: 129: value=df1 \% df2; 130: default: 131: break; 132: } 133: }else if (f1 instanceof Boolean && f2 instanceof Boolean){ 134: value= ((Boolean)f1).booleanValue() && ((Boolean)f2).booleanValue(); 135: }else{ 136: System.err.println("不能执行term运算,f1="+f1+",f2="+f2); 137: } 138: }; 139: 140: factor returns[Object value]: 141: i = ID{ 142: String var = $i.text.toLowerCase(); 143: SymTabEntry id_entry = symtabStack.lookup(var); 144: if (id_entry == null){ 145: System.err.println("使用不存在的变量:"+var); 146: }else{ 147: value = id_entry.getAttribute(SymTabKeyImpl.DATA_VALUE); 148: } 149: } 150: | n=NUMBER{value =((PascalAntlrToken)$n.getToken()).getValue();} 151: | r=NUMBER_REAL{value = ((PascalAntlrToken)$r.getToken()).getValue();} 152: | s=STRING {value =((PascalAntlrToken)$s.getToken()).getValue(); } 153: | ^(NOT f=factor) { 154: if (f instanceof Boolean){ 155: value = !((Boolean)f).booleanValue(); 156: }else{ 157: System.err.println("不是一个布尔值:"+f); 158: } 159: } 160: 161: | ^(NESTEXPR e = expr){ 162: value = e; 163: }; 164: 165: rel_ops returns [int type]: 166: LT{type =1;} | LE{type = 2;} | GT{type=3;} | GE{type=4;} | NOT_EQUAL{type=5;}; 167: add_ops returns [int type]: 168: PLUS{type=1;} | MINUS{type=2;} | OR{type=3;}; 169: mul_ops returns [int type]: 170: STAR{type = 1;} | SLASH{type=2;} | DIV{type=3;} | MOD{type=4;} | AND{type=5;};测试程序:
1: public final class SimpleInterpreter {
2: public static void main(String[] args) throws IOException, RecognitionException {
3: //第5章示例Pascal
4: InputStreamReader stream = new InputStreamReader(ShowToken.class.getResourceAsStream("/assignments.txt"));
5: ANTLRReaderStream reader = new ANTLRReaderStream(stream);
6: //词法分析器
7: PascalLexer lexer = new PascalLexer(reader);
8: CommonTokenStream token_stream = new CommonTokenStream(lexer);
9: //语法分析器并带自己的TreeAdaptor,转换成自己的PascalNode
10: PascalParser parser = new PascalParser(token_stream);
11: parser.setTreeAdaptor(new PascalNodeAdaptor());
12: program_return prog = parser.program();13: //遍历树并运算
14: TreeNodeStream node_stream = new CommonTreeNodeStream(prog.getTree());
15: PascalVisitor interpreter = new PascalVisitor(node_stream);
16: interpreter.program(); 17: SymTabStack stack = interpreter.getSymbolTableStack();18: SymTabEntry five_entry = stack.lookup("five");
19: System.out.println("Five = "+five_entry.getAttribute(SymTabKeyImpl.DATA_VALUE));
20: SymTabEntry str_entry = stack.lookup("str");
21: System.out.println("str = "+str_entry.getAttribute(SymTabKeyImpl.DATA_VALUE));
22: SymTabEntry fahrenheit_entry = stack.lookup("fahrenheit");
23: System.out.println("fahrenheit = "+fahrenheit_entry.getAttribute(SymTabKeyImpl.DATA_VALUE));
24: SymTabEntry centigrade_entry = stack.lookup("centigrade");
25: System.out.println("centigrade = "+centigrade_entry.getAttribute(SymTabKeyImpl.DATA_VALUE));
26: } 27: }最后的输出结果:
Five = 5.0
str = 'hello, world'
fahrenheit = 32.0
centigrade = 25
浙公网安备 33010602011771号