自制编译器 青木峰郎 笔记 Ch6 语法分析

6.1 定义的分析

表示程序整体

compilation_unit(): {}
{
      import_stmts() top_defs() <EOF>
}

import

import_stmts(): {}
{
      (import_stmt())*
}
import_stmt():{}
{
      <IMPORT> name() ("." name())* ";"
}
//用于特殊处理包名
name()
{
      <IDENTIFIER>
}

top_defs

Declarations top_defs():
{
    Declarations decls = new Declarations();
    DefinedFunction defun;
    List<DefinedVariable> defvars;
    Constant defconst;
    StructNode defstruct;
    UnionNode defunion;
    TypedefNode typedef;
}
{
    ( LOOKAHEAD(storage() typeref() <IDENTIFIER> "(")
      defun=defun()         { decls.addDefun(defun); }
    | LOOKAHEAD(3)
      defvars=defvars()     { decls.addDefvars(defvars); }
    | defconst=defconst()   { decls.addConstant(defconst); }
    | defstruct=defstruct() { decls.addDefstruct(defstruct); }
    | defunion=defunion()   { decls.addDefunion(defunion); }
    | typedef=typedef()     { decls.addTypedef(typedef); }
    )*
        {
            return decls;
        }
}
// #@@}

defvars

// #@@range/defvars{
List<DefinedVariable> defvars():
{
    List<DefinedVariable> defs = new ArrayList<DefinedVariable>();
    boolean priv;
    TypeNode type;
    String name;
    ExprNode init = null;
}
{
    priv=storage() type=type() name=name() ["=" init=expr()]
        {
            defs.add(new DefinedVariable(priv, type, name, init));
            init = null;
        }
    ( "," name=name() ["=" init=expr()]
        {
//新变量
            defs.add(new DefinedVariable(priv, type, name, init));
            init = null;
        }
    )* ";"
        {
            return defs;
        }
}
// #@@}

defconst

// #@@range/defconst{
Constant defconst():
{
    TypeNode type;
    String name;
    ExprNode value;
}
{
    <CONST> type=type() name=name() "=" value=expr() ";"
        {
            return new Constant(type, name, value);
        }
}
// #@@}

函数定义defun

// #@@range/defun{
DefinedFunction defun():
{
    boolean priv;
    TypeRef ret;
    String n;
    Params ps;
    BlockNode body;
}
{
    priv=storage() ret=typeref() n=name() "(" ps=params() ")" body=block()
        {
            TypeRef t = new FunctionTypeRef(ret, ps.parametersTypeRef());
            return new DefinedFunction(priv, new TypeNode(t), n, ps, body);
        }
}
// #@@}

// #@@range/storage{
boolean storage():
{ Token t = null; }
{
    [t=<STATIC>] { return (t == null ? false : true); }
}
// #@@}

// #@@range/storage{
Params params():
{
    Token t;
    Params params;
}
{
//目的是为了排除void *这类语法
      LOOKAHEAD(<VOID> ")")
      t=<VOID>
        {
            return new Params(location(t), new ArrayList<CBCParameter>());
        }
    | params=fixedparams()
            ["," "..." { params.acceptVarargs(); }]
        {
            return params;
        }
}
// #@@}

// #@@range/fixedparams{
Params fixedparams():
{
    List<CBCParameter> params = new ArrayList<CBCParameter>();
    CBCParameter param, param1;
}
{
    param1=param() { params.add(param1); }
    ( LOOKAHEAD(2) "," param=param() { params.add(param); } )*
        {
            return new Params(param1.location(), params);
        }
}
// #@@}

// #@@range/param{
CBCParameter param():
{
    TypeNode t;
    String n;
}
{
    t=type() n=name() { return new CBCParameter(t, n); }
}
// #@@}

// #@@range/block{
BlockNode block():
{
    Token t;
    List<DefinedVariable> vars;
    List<StmtNode> stmts;
}
{
    t="{" vars=defvar_list() stmts=stmts() "}"
        {
            return new BlockNode(location(t), vars, stmts);
        }
}
// #@@}

defstruct/defunion

// #@@range/defstruct{
StructNode defstruct():
{
    Token t;
    String n;
    List<Slot> membs;
}
{
    t=<STRUCT> n=name() membs=member_list() ";"
        {
            return new StructNode(location(t), new StructTypeRef(n), n, membs);
        }
}
// #@@}

UnionNode defunion():
{
    Token t;
    String n;
    List<Slot> membs;
}
{
    t=<UNION> n=name() membs=member_list() ";"
        {
            return new UnionNode(location(t), new UnionTypeRef(n), n, membs);
        }
}

// #@@range/member_list{
List<Slot> member_list():
{
    List<Slot> membs = new ArrayList<Slot>();
    Slot s;
}
{
    "{" (s=slot() ";" { membs.add(s); })* "}"
        {
            return membs;
        }
}
// #@@}

// #@@range/slot{
Slot slot():
{
    TypeNode t;
    String n;
}
{
    t=type() n=name() { return new Slot(t, n); }
}
// #@@}

typeref

// #@@range/type{
TypeNode type():
{ TypeRef ref; }
{
    ref=typeref() { return new TypeNode(ref); }
}
// #@@}

// #@@range/typeref{
TypeRef typeref():
{
    TypeRef ref;
    Token t;
    ParamTypeRefs params;
}
{
    ref=typeref_base()
    ( LOOKAHEAD(2)
      "[" "]"
        {
//不定长数组
            ref = new ArrayTypeRef(ref);
        }
    | "[" t=<INTEGER> "]"
        {
//定长数组
            ref = new ArrayTypeRef(ref, integerValue(t.image));
        }
    | "*"
//指针
        {
            ref = new PointerTypeRef(ref);
        }
    | "(" params=param_typerefs() ")"
        {
            ref = new FunctionTypeRef(ref, params);
        }
    )*
        {
            return ref;
        }
}
// #@@}

// #@@range/param_typerefs{
ParamTypeRefs param_typerefs():
{ ParamTypeRefs params; }
{
      LOOKAHEAD(<VOID> ")")
      <VOID>
        {
            return new ParamTypeRefs(new ArrayList<TypeRef>());
        }
    | params=fixedparam_typerefs()
          [ "," "..."   { params.acceptVarargs(); }]
        {
            return params;
        }
}
// #@@}

// #@@range/fixedparam_typerefs{
ParamTypeRefs fixedparam_typerefs():
{
    List<TypeRef> refs = new ArrayList<TypeRef>();
    TypeRef ref;
}
{
    ref=typeref() { refs.add(ref); }
    ( LOOKAHEAD(2) "," ref=typeref() { refs.add(ref); } )*
        {
            return new ParamTypeRefs(refs);
        }
}
// #@@}

// #@@range/typeref_base{
//除了指针,数组之外的全部类型
TypeRef typeref_base():
{
    Token t, name;
}
{
      t=<VOID>          { return new VoidTypeRef(location(t)); }
    | t=<CHAR>          { return IntegerTypeRef.charRef(location(t)); }
    | t=<SHORT>         { return IntegerTypeRef.shortRef(location(t)); }
    | t=<INT>           { return IntegerTypeRef.intRef(location(t)); }
    | t=<LONG>          { return IntegerTypeRef.longRef(location(t)); }
    | LOOKAHEAD(2) t=<UNSIGNED> <CHAR>
        { return IntegerTypeRef.ucharRef(location(t)); }
    | LOOKAHEAD(2) t=<UNSIGNED> <SHORT>
        { return IntegerTypeRef.ushortRef(location(t)); }
    | LOOKAHEAD(2) t=<UNSIGNED> <INT>
        { return IntegerTypeRef.uintRef(location(t)); }
    | t=<UNSIGNED> <LONG>
        { return IntegerTypeRef.ulongRef(location(t)); }
    | t=<STRUCT> name=<IDENTIFIER>
        { return new StructTypeRef(location(t), name.image); }
    | t=<UNION> name=<IDENTIFIER>
        { return new UnionTypeRef(location(t), name.image); }
    | LOOKAHEAD({isType(getToken(1).image)}) name=<IDENTIFIER>
//这里getToken(1)返回前项的第1个Token
//isType在参数为typedef中定义过的类型名时返回true
//对于(t)(1+2),t可能是类型名,用于强制类型转换,也可能是函数指针,用来调用dynamic函数。因此,有必要将可能用到新类型的地方都特别用IsType来检查下
        { return new UserTypeRef(location(name), name.image); }
}
// #@@}

6.2 语句的分析


// #@@range/stmts{
List<StmtNode> stmts():
{
    List<StmtNode> ss = new ArrayList<StmtNode>();
    StmtNode s;
}
{
    (s=stmt() { if (s != null) ss.add(s); })*
        {
            return ss;
        }
}
// #@@}

// #@@range/stmt{
StmtNode stmt():
{
    StmtNode n = null;
    ExprNode e = null;
}
{
    ( ";"
    | LOOKAHEAD(2) n=labeled_stmt()
    | e=expr() ";" { n = new ExprStmtNode(e.location(), e); }
    | n=block()
    | n=if_stmt()
    | n=while_stmt()
    | n=dowhile_stmt()
    | n=for_stmt()
    | n=switch_stmt()
    | n=break_stmt()
    | n=continue_stmt()
    | n=goto_stmt()
    | n=return_stmt()
    )
        {
            return n;
        }
}
// #@@}

LabelNode labeled_stmt():
{
    Token t;
    StmtNode n;
}
{
    t=<IDENTIFIER> ":" n=stmt()
        {
            return new LabelNode(location(t), t.image, n);
        }
}

// #@@range/if_stmt{
IfNode if_stmt():
{
    Token t;
    ExprNode cond;
    StmtNode thenBody, elseBody = null;
}
{
    t=<IF> "(" cond=expr() ")" thenBody=stmt()
            [LOOKAHEAD(1) <ELSE> elseBody=stmt()]
        {
            return new IfNode(location(t), cond, thenBody, elseBody);
        }
}
// #@@}

// #@@range/while_stmt{
WhileNode while_stmt():
{
    Token t;
    ExprNode cond;
    StmtNode body;
}
{
    t=<WHILE> "(" cond=expr() ")" body=stmt()
        {
            return new WhileNode(location(t), cond, body);
        }
}
// #@@}

DoWhileNode dowhile_stmt():
{
    Token t;
    ExprNode cond;
    StmtNode body;
}
{
    t=<DO> body=stmt() <WHILE> "(" cond=expr() ")" ";"
        {
            return new DoWhileNode(location(t), body, cond);
        }
}

ForNode for_stmt():
{
    Token t;
    ExprNode init = null, cond = null, incr = null;
    StmtNode body;
}
{
    t=<FOR> "(" [init=expr()] ";"
              [cond=expr()] ";"
              [incr=expr()] ")" body=stmt()
        {
            return new ForNode(location(t), init, cond, incr, body);
        }
}

SwitchNode switch_stmt():
{
    Token t;
    ExprNode cond;
    List<CaseNode> bodies;
}
{
    t=<SWITCH> "(" cond=expr() ")" "{" bodies=case_clauses() "}"
        {
            return new SwitchNode(location(t), cond, bodies);
        }
}

List<CaseNode> case_clauses():
{
    List<CaseNode> clauses = new ArrayList<CaseNode>();
    CaseNode n;
}
{
    (n=case_clause() { clauses.add(n); })*
            [n=default_clause() { clauses.add(n); }]
        {
            return clauses;
        }
}

CaseNode case_clause():
{
    List<ExprNode> values;
    BlockNode body;
}
{
    values=cases() body=case_body()
        {
            return new CaseNode(body.location(), values, body);
        }
}

List<ExprNode> cases():
{
    List<ExprNode> values = new ArrayList<ExprNode>();
    ExprNode n;
}
{
    (<CASE> n=primary() ":" { values.add(n); })+
        {
            return values;
        }
}

CaseNode default_clause():
{ BlockNode body; }
{
    <DEFAULT_> ":" body=case_body()
        {
            return new CaseNode(body.location(), new ArrayList<ExprNode>(), body);
        }
}

BlockNode case_body():
{
    LinkedList<StmtNode> stmts = new LinkedList<StmtNode>();
    StmtNode s;
}
{
    (s=stmt() { if (s != null) stmts.add(s); })+
        {
            // last stmt of case clause must be break stmt.
            if (! (stmts.getLast() instanceof BreakNode)) {
                throw new ParseException(
                  "missing break statement at the last of case clause");
            }
            return new BlockNode(stmts.get(0).location(),
                                 new ArrayList<DefinedVariable>(),
                                 stmts);
        }
}

GotoNode goto_stmt():
{ Token t, name; }
{
    t=<GOTO> name=<IDENTIFIER> ";"
        {
            return new GotoNode(location(t), name.image);
        }
}

// #@@range/break_stmt{
BreakNode break_stmt():
{ Token t; }
{
    t=<BREAK> ";" { return new BreakNode(location(t)); }
}
// #@@}

ContinueNode continue_stmt():
{ Token t; }
{
    t=<CONTINUE> ";" { return new ContinueNode(location(t)); }
}

ReturnNode return_stmt():
{
    Token t;
    ExprNode expr;
}
{
      LOOKAHEAD(2) t=<RETURN> ";" { return new ReturnNode(location(t), null); }//无返回值的情况
    | t=<RETURN> expr=expr() ";"  { return new ReturnNode(location(t), expr); }
}

6.3 表达式的分析


// #@@range/expr{
ExprNode expr():
{
    ExprNode lhs, rhs, expr;
    String op;
}
{
      LOOKAHEAD(term() "=")
      lhs=term() "=" rhs=expr()
        {
            return new AssignNode(lhs, rhs);
        }
    | LOOKAHEAD(term() opassign_op())
      lhs=term() op=opassign_op() rhs=expr()
        {
            return new OpAssignNode(lhs, op, rhs);
        }
    | expr=expr10()
        {
            return expr;
        }
}
// #@@}

// #@@range/opassign_op{
String opassign_op(): {}
{
    ( "+="  { return "+"; }
    | "-="  { return "-"; }
    | "*="  { return "*"; }
    | "/="  { return "/"; }
    | "%="  { return "%"; }
    | "&="  { return "&"; }
    | "|="  { return "|"; }
    | "^="  { return "^"; }
    | "<<=" { return "<<"; }
    | ">>=" { return ">>"; }
    )
}
// #@@}

// #@@range/expr10{
ExprNode expr10():
{ ExprNode c, t, e; }
{
    c=expr9() ["?" t=expr() ":" e=expr10()
                    { return new CondExprNode(c, t, e); }]
        {
            return c;
        }
}
// #@@}

// #@@range/expr9{
ExprNode expr9():
{ ExprNode l, r; }
{
    l=expr8() ("||" r=expr8() { l = new LogicalOrNode(l, r); })*
        {
            return l;
        }
}
// #@@}

// #@@range/expr8{
ExprNode expr8():
{ ExprNode l, r; }
{
    l=expr7() ("&&" r=expr7() { l = new LogicalAndNode(l, r); })*
        {
            return l;
        }
}
// #@@}

// #@@range/expr7{
ExprNode expr7():
{ ExprNode l, r; }
{
    l=expr6() ( ">"  r=expr6() { l = new BinaryOpNode(l, ">", r); }
              | "<"  r=expr6() { l = new BinaryOpNode(l, "<", r); }
              | ">=" r=expr6() { l = new BinaryOpNode(l, ">=", r); }
              | "<=" r=expr6() { l = new BinaryOpNode(l, "<=", r); }
              | "==" r=expr6() { l = new BinaryOpNode(l, "==", r); }
              | "!=" r=expr6() { l = new BinaryOpNode(l, "!=", r); } )*
        {
            return l;
        }
}
// #@@}

// #@@range/expr6{
ExprNode expr6():
{ ExprNode l, r; }
{
    l=expr5() ("|" r=expr5() { l = new BinaryOpNode(l, "|", r); })*
        {
            return l;
        }
}
// #@@}

// #@@range/expr5{
ExprNode expr5():
{ ExprNode l, r; }
{
    l=expr4() ("^" r=expr4() { l = new BinaryOpNode(l, "^", r); })*
        {
            return l;
        }
}
// #@@}

// #@@range/expr4{
ExprNode expr4():
{ ExprNode l, r; }
{
    l=expr3() ("&" r=expr3() { l = new BinaryOpNode(l, "&", r); })*
        {
            return l;
        }
}
// #@@}

// #@@range/expr3{
ExprNode expr3():
{ ExprNode l, r; }
{
    l=expr2() ( ">>" r=expr2() { l = new BinaryOpNode(l, ">>", r); }
              | "<<" r=expr2() { l = new BinaryOpNode(l, "<<", r); }
              )*
        {
            return l;
        }
}
// #@@}

// #@@range/expr2{
ExprNode expr2():
{ ExprNode l, r; }
{
    l=expr1() ( "+" r=expr1() { l = new BinaryOpNode(l, "+", r); }
              | "-" r=expr1() { l = new BinaryOpNode(l, "-", r); }
              )*
        {
            return l;
        }
}
// #@@}

// #@@range/expr1{
ExprNode expr1():
{ ExprNode l, r; }
{
    l=term() ( "*" r=term() { l = new BinaryOpNode(l, "*", r); }
             | "/" r=term() { l = new BinaryOpNode(l, "/", r); }
             | "%" r=term() { l = new BinaryOpNode(l, "%", r); }
             )*
        {
            return l;
        }
}
// #@@}

exprN中的N代表优先级,expr1优先级最高,expr10优先级最低。这里要注意不要写成无限递归自身,也就是不要写exprX():{}{exprX() Y() Z(){//dosth}}

6.4 项的分析


// #@@range/term{
ExprNode term():
{
    TypeNode t;
    ExprNode n;
}
{
      LOOKAHEAD("(" type())
      "(" t=type() ")" n=term()     { return new CastNode(t, n); }
    | n=unary()                     { return n; }
}
// #@@}

// #@@range/unary{
ExprNode unary():
{
    ExprNode n;
    TypeNode t;
}
{
      "++" n=unary()    { return new PrefixOpNode("++", n); }
    | "--" n=unary()    { return new PrefixOpNode("--", n); }
    | "+" n=term()      { return new UnaryOpNode("+", n); }
    | "-" n=term()      { return new UnaryOpNode("-", n); }
    | "!" n=term()      { return new UnaryOpNode("!", n); }
    | "~" n=term()      { return new UnaryOpNode("~", n); }
    | "*" n=term()      { return new DereferenceNode(n); }
    | "&" n=term()      { return new AddressNode(n); }
    | LOOKAHEAD(3) <SIZEOF> "(" t=type() ")"
        {
            return new SizeofTypeNode(t, size_t());
        }
    | <SIZEOF> n=unary()
        {
            return new SizeofExprNode(n, size_t());
        }
    | n=postfix()       { return n; }
}
// #@@}

// #@@range/postfix{
ExprNode postfix():
{
    ExprNode expr, idx;
    String memb;
    List<ExprNode> args;
}
{
    expr=primary()
    ( "++"                  { expr = new SuffixOpNode("++", expr); }
    | "--"                  { expr = new SuffixOpNode("--", expr); }
    | "[" idx=expr() "]"    { expr = new ArefNode(expr, idx); }
    | "." memb=name()       { expr = new MemberNode(expr, memb); }
    | "->" memb=name()      { expr = new PtrMemberNode(expr, memb); }
    | "(" args=args() ")"   { expr = new FuncallNode(expr, args); }
    )*
        {
            return expr;
        }
}
// #@@}

// #@@range/name{
String name():
{ Token t; }
{
    t=<IDENTIFIER> { return t.image; }
}
// #@@}

// #@@range/args{
List<ExprNode> args():
{
    List<ExprNode> args = new ArrayList<ExprNode>();
    ExprNode arg;
}
{
    [ arg=expr() { args.add(arg); }
      ("," arg=expr() { args.add(arg); })* ]
        {
            return args;
        }
}
// #@@}

// #@@range/primary{
ExprNode primary():
{
    Token t;
    ExprNode n;
}
{
      t=<INTEGER>
        {
            return integerNode(location(t), t.image);
        }
    | t=<CHARACTER>
        {
            return new IntegerLiteralNode(location(t),
                                          IntegerTypeRef.charRef(),
                                          characterCode(t.image));
        }
    | t=<STRING>
        {
            return new StringLiteralNode(location(t),
                new PointerTypeRef(IntegerTypeRef.charRef()),
                stringValue(t.image));
        }
    | t=<IDENTIFIER>
        {
            return new VariableNode(location(t), t.image);
        }
    | "(" n=expr() ")"
        {
            return n;
        }
}
// #@@}

这里primary是最小的符号,可以代表Literal,变量的引用等。注意这里将(expr())也当作primary来看待,以便将表达式作为函数参数传入

posted @ 2021-02-05 17:54  雪溯  阅读(251)  评论(0编辑  收藏  举报