antlr-代码分析(1)

grammar文件

grammar LabeledExpr; // rename to distinguish from Expr.g4

prog:   stat+ ;

stat:   expr NEWLINE                # printExpr
    |   ID '=' expr NEWLINE         # assign
    |   NEWLINE                     # blank
    ;

expr:   expr op=('*'|'/') expr      # MulDiv
    |   expr op=('+'|'-') expr      # AddSub
    |   INT                         # int
    |   ID                          # id
    |   '(' expr ')'                # parens
    ;

MUL :   '*' ; // assigns token name to '*' used above in grammar
DIV :   '/' ;
ADD :   '+' ;
SUB :   '-' ;
ID  :   [a-zA-Z]+ ;      // match identifiers
INT :   [0-9]+ ;         // match integers
NEWLINE:'\r'? '\n' ;     // return newlines to parser (is end-statement signal)
WS  :   [ \t]+ -> skip ; // toss out whitespace

语法文件会生成LabeledExpr.tokens、LabeledExprLexer、LabeledExprParser

tokens

其中tokens代表所有的符号得多种定义方式

T__0=1
T__1=2
T__2=3
MUL=4
DIV=5
ADD=6
SUB=7
ID=8
INT=9
NEWLINE=10
WS=11
'='=1
'('=2
')'=3
'*'=4
'/'=5
'+'=6
'-'=7

比如这里的T__0、=是一个符号；
这里的MUL、*是一个符号；

对于例子

a=1
b=2
c=3
d=a+b*c

生成的解析树如下

LabeledExprLexer

继承关系：LabeledExprLexer->Lexer->Recognizer
详细描述一下LabeledExprLexer

public class LabeledExprLexer extends Lexer {
	protected static final DFA[] _decisionToDFA;
	protected static final PredictionContextCache _sharedContextCache =
		new PredictionContextCache();
	//规则名对应的数字编号
	public static final int
		T__0=1, T__1=2, T__2=3, MUL=4, DIV=5, ADD=6, SUB=7, ID=8, INT=9, NEWLINE=10, 
		WS=11;
	public static String[] modeNames = {
		"DEFAULT_MODE"
	};
	//规则名(全部的)
	public static final String[] ruleNames = {
		"T__0", "T__1", "T__2", "MUL", "DIV", "ADD", "SUB", "ID", "INT", "NEWLINE", 
		"WS"
	};
	//词法对应的符号
	private static final String[] _LITERAL_NAMES = {
		null, "'='", "'('", "')'", "'*'", "'/'", "'+'", "'-'"
	};
	//后面定义的词法字母
	private static final String[] _SYMBOLIC_NAMES = {
		null, null, null, null, "MUL", "DIV", "ADD", "SUB", "ID", "INT", "NEWLINE", 
		"WS"
	};
	//VocabularyImpl的字面名和符号名都在这里传入，符号名就是词法规则前面的内容
	public static final Vocabulary VOCABULARY = new VocabularyImpl(_LITERAL_NAMES, _SYMBOLIC_NAMES);
	public static final String[] tokenNames;

	//初始化tokenNames
	static {
		tokenNames = new String[_SYMBOLIC_NAMES.length];
		for (int i = 0; i < tokenNames.length; i++) {
			//先获取字面名，再获取符号名
			tokenNames[i] = VOCABULARY.getLiteralName(i);
			if (tokenNames[i] == null) {
				tokenNames[i] = VOCABULARY.getSymbolicName(i);
			}

			if (tokenNames[i] == null) {
				tokenNames[i] = "<INVALID>";
			}
		}
	}

	@Override
	@Deprecated
	public String[] getTokenNames() {
		return tokenNames;
	}

	@Override
	public Vocabulary getVocabulary() {
		return VOCABULARY;
	}

	public LabeledExprLexer(CharStream input) {
		super(input);
		_interp = new LexerATNSimulator(this,_ATN,_decisionToDFA,_sharedContextCache);
	}

	@Override
	public String getGrammarFileName() { return "LabeledExpr.g4"; }

	@Override
	public String[] getRuleNames() { return ruleNames; }

	@Override
	public String getSerializedATN() { return _serializedATN; }

	@Override
	public String[] getModeNames() { return modeNames; }

	@Override
	public ATN getATN() { return _ATN; }
		public static final String _serializedATN =
		"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2\r=\b\1\4\2\t\2\4"+
		"\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t"+
		"\13\4\f\t\f\3\2\3\2\3\3\3\3\3\4\3\4\3\5\3\5\3\6\3\6\3\7\3\7\3\b\3\b\3"+
		"\t\6\t)\n\t\r\t\16\t*\3\n\6\n.\n\n\r\n\16\n/\3\13\5\13\63\n\13\3\13\3"+
		"\13\3\f\6\f8\n\f\r\f\16\f9\3\f\3\f\2\2\r\3\3\5\4\7\5\t\6\13\7\r\b\17\t"+
		"\21\n\23\13\25\f\27\r\3\2\5\4\2C\\c|\3\2\62;\4\2\13\13\"\"@\2\3\3\2\2"+
		"\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3"+
		"\2\2\2\2\21\3\2\2\2\2\23\3\2\2\2\2\25\3\2\2\2\2\27\3\2\2\2\3\31\3\2\2"+
		"\2\5\33\3\2\2\2\7\35\3\2\2\2\t\37\3\2\2\2\13!\3\2\2\2\r#\3\2\2\2\17%\3"+
		"\2\2\2\21(\3\2\2\2\23-\3\2\2\2\25\62\3\2\2\2\27\67\3\2\2\2\31\32\7?\2"+
		"\2\32\4\3\2\2\2\33\34\7*\2\2\34\6\3\2\2\2\35\36\7+\2\2\36\b\3\2\2\2\37"+
		" \7,\2\2 \n\3\2\2\2!\"\7\61\2\2\"\f\3\2\2\2#$\7-\2\2$\16\3\2\2\2%&\7/"+
		"\2\2&\20\3\2\2\2\')\t\2\2\2(\'\3\2\2\2)*\3\2\2\2*(\3\2\2\2*+\3\2\2\2+"+
		"\22\3\2\2\2,.\t\3\2\2-,\3\2\2\2./\3\2\2\2/-\3\2\2\2/\60\3\2\2\2\60\24"+
		"\3\2\2\2\61\63\7\17\2\2\62\61\3\2\2\2\62\63\3\2\2\2\63\64\3\2\2\2\64\65"+
		"\7\f\2\2\65\26\3\2\2\2\668\t\4\2\2\67\66\3\2\2\289\3\2\2\29\67\3\2\2\2"+
		"9:\3\2\2\2:;\3\2\2\2;<\b\f\2\2<\30\3\2\2\2\7\2*/\629\3\b\2\2";
	public static final ATN _ATN =
		new ATNDeserializer().deserialize(_serializedATN.toCharArray());
	static {
		_decisionToDFA = new DFA[_ATN.getNumberOfDecisions()];
		for (int i = 0; i < _ATN.getNumberOfDecisions(); i++) {
			_decisionToDFA[i] = new DFA(_ATN.getDecisionState(i), i);
		}
	}
}

VocabularyImpl
VocabularyImpl->>Vocabulary
提供了Vocabulary接口的默认实现
vocabulary接口提供了识别器需要的词汇信息
Vocabulary接口内容如

public interface Vocabulary {
	int getMaxTokenType();						//返回最大值
	String getLiteralName(int tokenType);		//返回一个tokenType数字对应的字符串
	String getSymbolicName(int tokenType);		//返回符号名
	String getDisplayName(int tokenType);
}

VocabularyImpl的实现
设置literalNames和symbolicNames并将displayNames设置为空、
maxTokenType为literalNames和symbolicNames长度的最大值

public class VocabularyImpl implements Vocabulary {
	private static final String[] EMPTY_NAMES = new String[0];
	public static final VocabularyImpl EMPTY_VOCABULARY = new VocabularyImpl(EMPTY_NAMES, EMPTY_NAMES, EMPTY_NAMES);
	private final String[] literalNames;
	private final String[] symbolicNames;
	private final String[] displayNames;
	private final int maxTokenType;
	//构造1
	public VocabularyImpl(String[] literalNames, String[] symbolicNames) {
		this(literalNames, symbolicNames, null);
	}
	//构造2
	public VocabularyImpl(String[] literalNames, String[] symbolicNames, String[] displayNames) {
		this.literalNames = literalNames != null ? literalNames : EMPTY_NAMES;
		this.symbolicNames = symbolicNames != null ? symbolicNames : EMPTY_NAMES;
		this.displayNames = displayNames != null ? displayNames : EMPTY_NAMES;
		// See note here on -1 part: https://github.com/antlr/antlr4/pull/1146
		this.maxTokenType =
			Math.max(this.displayNames.length,
					 Math.max(this.literalNames.length, this.symbolicNames.length)) - 1;
	}

	public static Vocabulary fromTokenNames(String[] tokenNames) {
	if (tokenNames == null || tokenNames.length == 0) {
		return EMPTY_VOCABULARY;
	}

	String[] literalNames = Arrays.copyOf(tokenNames, tokenNames.length);
	String[] symbolicNames = Arrays.copyOf(tokenNames, tokenNames.length);
	for (int i = 0; i < tokenNames.length; i++) {
		String tokenName = tokenNames[i];
		if (tokenName == null) {
			continue;
		}

		if (!tokenName.isEmpty()) {
			char firstChar = tokenName.charAt(0);
			if (firstChar == '\'') {
				symbolicNames[i] = null;
				continue;
			}
			else if (Character.isUpperCase(firstChar)) {
				literalNames[i] = null;
				continue;
			}
		}

		// wasn't a literal or symbolic name
		literalNames[i] = null;
		symbolicNames[i] = null;
	}

		return new VocabularyImpl(literalNames, symbolicNames, tokenNames);
	}

	@Override
	public int getMaxTokenType() {
		return maxTokenType;
	}

	@Override
	public String getLiteralName(int tokenType) {
		if (tokenType >= 0 && tokenType < literalNames.length) {
			return literalNames[tokenType];
		}
		return null;
	}

	@Override
	public String getSymbolicName(int tokenType) {
		if (tokenType >= 0 && tokenType < symbolicNames.length) {
			return symbolicNames[tokenType];
		}

		if (tokenType == Token.EOF) {
			return "EOF";
		}

		return null;
	}

	@Override
	public String getDisplayName(int tokenType) {
		if (tokenType >= 0 && tokenType < displayNames.length) {
			String displayName = displayNames[tokenType];
			if (displayName != null) {
				return displayName;
			}
		}

		String literalName = getLiteralName(tokenType);
		if (literalName != null) {
			return literalName;
		}

		String symbolicName = getSymbolicName(tokenType);
		if (symbolicName != null) {
			return symbolicName;
		}
		return Integer.toString(tokenType);
	}
}

现在看下识别器Recognizer

Lexer和parser都是继承自Recognizer

public abstract class Recognizer<Symbol, ATNInterpreter extends ATNSimulator> {
	public static final int EOF=-1;	
	private static final Map<Vocabulary, Map<String, Integer>> tokenTypeMapCache =
		new WeakHashMap<Vocabulary, Map<String, Integer>>();
	private static final Map<String[], Map<String, Integer>> ruleIndexMapCache =
		new WeakHashMap<String[], Map<String, Integer>>();
	private List<ANTLRErrorListener> _listeners =
		new CopyOnWriteArrayList<ANTLRErrorListener>() {{
			add(ConsoleErrorListener.INSTANCE);
		}};

	protected ATNInterpreter _interp;

	private int _stateNumber = -1;
	public abstract String[] getTokenNames();
	public abstract String[] getRuleNames();
	public Vocabulary getVocabulary() {
		return VocabularyImpl.fromTokenNames(getTokenNames());
	}
	//tokenname到tokentype的map
	public Map<String, Integer> getTokenTypeMap() {
		Vocabulary vocabulary = getVocabulary();
		synchronized (tokenTypeMapCache) {
			Map<String, Integer> result = tokenTypeMapCache.get(vocabulary);
			if (result == null) {
				result = new HashMap<String, Integer>();
				for (int i = 0; i < getATN().maxTokenType; i++) {
					String literalName = vocabulary.getLiteralName(i);
					if (literalName != null) {
						result.put(literalName, i);
					}

					String symbolicName = vocabulary.getSymbolicName(i);
					if (symbolicName != null) {
						result.put(symbolicName, i);
					}
				}

				result.put("EOF", Token.EOF);
				result = Collections.unmodifiableMap(result);
				tokenTypeMapCache.put(vocabulary, result);
			}

			return result;
		}
	}

	//获取一个名字，从rule名到rule索引
	public Map<String, Integer> getRuleIndexMap() {
		String[] ruleNames = getRuleNames();
		if (ruleNames == null) {
			throw new UnsupportedOperationException("The current recognizer does not provide a list of rule names.");
		}

		synchronized (ruleIndexMapCache) {
			Map<String, Integer> result = ruleIndexMapCache.get(ruleNames);
			if (result == null) {
				result = Collections.unmodifiableMap(Utils.toMap(ruleNames));
				ruleIndexMapCache.put(ruleNames, result);
			}

			return result;
		}
	}
	
	public int getTokenType(String tokenName) {
		Integer ttype = getTokenTypeMap().get(tokenName);
		if ( ttype!=null ) return ttype;
		return Token.INVALID_TYPE;
	}
	//序列化的ATN
	public String getSerializedATN() {
		throw new UnsupportedOperationException("there is no serialized ATN");
	}
	
	public abstract String getGrammarFileName();
	//返回ATN
	public abstract ATN getATN();
	//获取ATN解释器
	public ATNInterpreter getInterpreter() {
		return _interp;
	}
	//在parse/lex阶段，返回执行过程
	public ParseInfo getParseInfo() {
		return null;
	}
	//设置ATN解释器
	public void setInterpreter(ATNInterpreter interpreter) {
		_interp = interpreter;
	}

	//错误header错误的位置
	public String getErrorHeader(RecognitionException e) {
		int line = e.getOffendingToken().getLine();
		int charPositionInLine = e.getOffendingToken().getCharPositionInLine();
		return "line "+line+":"+charPositionInLine;
	}

	public String getTokenErrorDisplay(Token t) {
		if ( t==null ) return "<no token>";
		String s = t.getText();
		if ( s==null ) {
			if ( t.getType()==Token.EOF ) {
				s = "<EOF>";
			}
			else {
				s = "<"+t.getType()+">";
			}
		}
		s = s.replace("\n","\\n");
		s = s.replace("\r","\\r");
		s = s.replace("\t","\\t");
		return "'"+s+"'";
	}

	public void addErrorListener(ANTLRErrorListener listener) {
		if (listener == null) {
			throw new NullPointerException("listener cannot be null.");
		}

		_listeners.add(listener);
	}
	public void removeErrorListener(ANTLRErrorListener listener) {
		_listeners.remove(listener);
	}

	public void removeErrorListeners() {
		_listeners.clear();
	}
	public List<? extends ANTLRErrorListener> getErrorListeners() {
		return _listeners;
	}
	public ANTLRErrorListener getErrorListenerDispatch() {
		return new ProxyErrorListener(getErrorListeners());
	}

	//如果ATN需要执行sempreds或者actions，子类需要覆盖下面的内容
	public boolean sempred(RuleContext _localctx, int ruleIndex, int actionIndex) {
		return true;
	}

	public boolean precpred(RuleContext localctx, int precedence) {
		return true;
	}

	public void action(RuleContext _localctx, int ruleIndex, int actionIndex) {
	}

	public final int getState() {
		return _stateNumber;
	}
	public final void setState(int atnState) {
			_stateNumber = atnState;
	}
	//getInputStream?
	public abstract IntStream getInputStream();
	public abstract void setInputStream(IntStream input);
	//getTokenFactory?
	public abstract TokenFactory<?> getTokenFactory();
	public abstract void setTokenFactory(TokenFactory<?> input);
}

posted @ 2016-12-13 14:28 zhangshihai1232 阅读(671) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

zhangshihai1232

antlr-代码分析(1)

grammar文件

tokens

LabeledExprLexer

现在看下识别器Recognizer

公告