C99 词法部分 Antlr Grammar

/*
 ============================================================================
 Name        : CTokens.g
 Author      : luqi
 Version     : 0.1
 Copyright   : Your copyright notice
 Description : C99 - Lexer - have tested <C99.pdf 6.4>
 ============================================================================
 */
grammar CTokens;

options {
  language = Java;
  superClass = DebugParser;
    //@ superClass = DebugLexer;
}

@header
{
package c99.ctokens;
import util.DebugParser;
}

@lexer::header
{
package c99.ctokens;
import util.DebugLexer;
}

prog     :    token
        ;
 
 
token   : KEYWORD                    { System.out.println("Meet KEYWORD: "        + $KEYWORD.text); }
        | IDENTIFIER                 { System.out.println("Meet IDENTIFIER: "     + $IDENTIFIER.text); }
        | CONSTANT                   { System.out.println("Meet CONSTANT: "       + $CONSTANT.text); }
        | STRING_LITERAL             { System.out.println("Meet STRING_LITERAL: " + $STRING_LITERAL.text); }
        //| PUNCTUATOR
        ;
        
keyword : KEYWORD
        ;
        
identifier  : IDENTIFIER
            ;
            
constant  : CONSTANT
          ;   
          
string_literal  : STRING_LITERAL
                ;
                
/*
==========================================================================================================
*/
        
KEYWORD :     'auto'        | 'break'       | 'case'        | 'char'        | 'const'       | 'continue'
            | 'default'     | 'do'          | 'double'      | 'else'        | 'enum'        | 'extern'
            | 'float'       | 'for'         | 'goto'        | 'if'          | 'inline'      | 'int'
            | 'long'        | 'register'    | 'restrict'    | 'return'      | 'short'       | 'signed'
            | 'sizeof'      | 'static'      | 'struct'      | 'switch'      | 'typedef'     | 'union'
            | 'unsigned'    | 'void'        | 'volatile'    | 'while'       | '_Bool'       | '_Complex'
            | '_Imaginary'
            ;
            
IDENTIFIER  :   IDENTIFIER_NONDIGIT ( IDENTIFIER_NONDIGIT | DIGIT ) *
            ;
            
fragment   
IDENTIFIER_NONDIGIT :   NONDIGIT
                    |   UNIVERSAL_CHARACTER_NAME
                    ;
                    
fragment        
NONDIGIT    :   'a' .. 'z'
            |   'A' .. 'Z'
            |   '_'
            ;
fragment           
DIGIT   :   '0' .. '9'
        ;

fragment        
UNIVERSAL_CHARACTER_NAME    :   '\\u' HEX_QUAD
                            |   '\\U' HEX_QUAD HEX_QUAD
                            ;
     
HEX_QUAD    :   HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT
            ;
            
CONSTANT    :   INTEGER_CONSTANT
            |   FLOATING_CONSTANT
          //|   ENUMERATION_CONSTANT
            |   CHARACTER_CONSTANT
            ;
            
fragment            
INTEGER_CONSTANT    :   DECIMAL_CONSTANT INTEGER_SUFFIX ?
                    |   OCTAL_CONSTANT INTEGER_SUFFIX ?
                    |   HEXADECIMAL_CONSTANT INTEGER_SUFFIX ?
                    ;
fragment                   
INTEGER_SUFFIX  :   UNSIGNED_SUFFIX LONG_SUFFIX ?
                |   UNSIGNED_SUFFIX LONG_LONG_SUFFIX
                |   LONG_SUFFIX UNSIGNED_SUFFIX ?
                |   LONG_LONG_SUFFIX UNSIGNED_SUFFIX    ?  
                ;
fragment                
UNSIGNED_SUFFIX : 'u'
                | 'U'
                ;
fragment                
LONG_SUFFIX     :   'l'
                |   'L'
                ;
fragment                
LONG_LONG_SUFFIX    :   'll'
                    |   'LL'
                    ;
fragment                   
DECIMAL_CONSTANT    :   NONZERO_DIGIT (DIGIT) *
                    ;
fragment       
OCTAL_CONSTANT  :   '0' (OCTAL_DIGIT) *
                ;
fragment                
HEXADECIMAL_CONSTANT    :   HEXADECIMAL_PREFIX ( HEXADECIMAL_DIGIT )+
                        ;
 

HEXADECIMAL_PREFIX  :   '0x'
                    |   '0X'
                    ;
fragment        
NONZERO_DIGIT   : '1' .. '9'
                ;
fragment               
OCTAL_DIGIT :   '0' .. '7'   
            ;
            
fragment
HEXADECIMAL_DIGIT   :   '0' .. '9'
                    |   'a' .. 'f'
                    |   'A' .. 'F'  
                    ;

fragment     
FLOATING_CONSTANT   :   DECIMAL_FLOATING_CONSTANT
                    |   HEXADECIMAL_FLOATING_CONSTANT    
                    ;
                    
fragment                   
DECIMAL_FLOATING_CONSTANT   :   FRACTIONAL_CONSTANT EXPONENT_PART ? FLOATING_SUFFIX ?
                            |   DIGIT_SEQUENCE EXPONENT_PART FLOATING_SUFFIX ?
                            ;
fragment                            
FRACTIONAL_CONSTANT :   DIGIT_SEQUENCE ? '.' DIGIT_SEQUENCE
                    |   DIGIT_SEQUENCE '.'
                    ;
                    
EXPONENT_PART   :   'e' SIGN ? DIGIT_SEQUENCE
                |   'E' SIGN ? DIGIT_SEQUENCE
                ;
fragment                
SIGN    :   '+'
        |   '-'
        ;
        
fragment        
DIGIT_SEQUENCE  :   DIGIT +
                ;
fragment       
HEXADECIMAL_FLOATING_CONSTANT   :   HEXADECIMAL_PREFIX HEXADECIMAL_FRACTIONAL_CONSTANT BINARY_EXPONENT_PART FLOATING_SUFFIX ?
                                |   HEXADECIMAL_PREFIX HEXADECIMAL_DIGIT_SEQUENCE BINARY_EXPONENT_PART FLOATING_SUFFIX ?
                                ;
                                
HEXADECIMAL_FRACTIONAL_CONSTANT :   HEXADECIMAL_DIGIT_SEQUENCE ? '.' HEXADECIMAL_DIGIT_SEQUENCE
                                |   HEXADECIMAL_DIGIT_SEQUENCE '.'
                                ;
        
BINARY_EXPONENT_PART    :   'p' SIGN ? DIGIT_SEQUENCE
                        |   'P' SIGN ? DIGIT_SEQUENCE
                        ;
                        
HEXADECIMAL_DIGIT_SEQUENCE  :   HEXADECIMAL_DIGIT +
                            ;
                            
fragment                            
FLOATING_SUFFIX :   'f'
                |   'l'
                |   'F'
                |   'L'
                ;

//fragment       
//ENUMERATION_CONSTANT    :   IDENTIFIER
//                        ;
 
fragment  
CHARACTER_CONSTANT  :   '\'' C_CHAR_SEQUENCE '\''
                    |   'L\'' C_CHAR_SEQUENCE '\''
                    ;
fragment                   
C_CHAR_SEQUENCE :   C_CHAR +
                ;
fragment        
C_CHAR  :   ~('\'' | '\\' )
        |   ESCAPE_SEQUENCE
        ;
        
ESCAPE_SEQUENCE :   SIMPLE_ESCAPE_SEQUENCE
                |   OCTAL_ESCAPE_SEQUENCE
                |   HEXADECIMAL_ESCAPE_SEQUENCE
                |   UNIVERSAL_CHARACTER_NAME
                ;
                
fragment                
SIMPLE_ESCAPE_SEQUENCE  :   '\\' ( '\'' | '"' | '?' | '\\' | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v'  )
                        ;
fragment
OCTAL_ESCAPE_SEQUENCE   :  '\\'  OCTAL_DIGIT                  //  OCTAL_DIGIT OCTAL_DIGIT ?  OCTAL_DIGIT ?
                        |  '\\'  OCTAL_DIGIT OCTAL_DIGIT
                        |  ('\\' OCTAL_DIGIT OCTAL_DIGIT  OCTAL_DIGIT )=> '\\'  OCTAL_DIGIT OCTAL_DIGIT  OCTAL_DIGIT
                        ;
   
fragment        
HEXADECIMAL_ESCAPE_SEQUENCE :   '\\x' HEXADECIMAL_DIGIT +
                            ;
 
STRING_LITERAL  :   '"'  S_CHAR_SEQUENCE ? '"'
                |   'L"' S_CHAR_SEQUENCE ? '"'
                ;
 
fragment                
S_CHAR_SEQUENCE :  S_CHAR +  
                ;

fragment
S_CHAR  :    ~('"' | '\\' )    
        |    ESCAPE_SEQUENCE
        ;


SINGLELINECOMMENT   :    '//' (~('\n'|'\r'))* ('\n'|'\r'('\n')?)? {$channel=HIDDEN;}
                    ;
                    
                    
MULTILINECOMMENT  : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
                  ;
      
                    
WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
    ;
    
    
    
    
        
            
            

        

posted @ 2011-11-30 18:36  lovegq  阅读(447)  评论(0编辑  收藏  举报