public class Lexer {
/**
* 註解
* group(2)
* 不放入queue
* 匹配註解 //.* : 匹配 //任意字 //.*
*/
public static String annot = "(//.*)";
/**
* NumToken
* group(3) 小數點跟數字
*/
public static String num = "([0-9]+)";
/**
* StrToken
* group(4)
* 字符串 原始表達式 "(\\"|\\\\|\\n|[^*])*"
* \\\\\": \\" 匹配 \"
* \\\\\\\\: \\\\ 匹配 \\
* \\\\n: \\n 匹配 \n
* [^\"]: ^" 匹配所有不等於(")的字
*/
public static String str = "(\"(\\\\\"|\\\\\\\\|\\\\n|[^\"])*\")";
/**
*
* IdToken
* 標示符號
* [A-Z_a-z][A-Z_a-z0-9]*|==|<=|>=|&&|\|\||\p{punct}
*/
public static String id = "[A-Z_a-z][A-Z_a-z0-9]*|==|<=|>=|&&|\\|\\||\\p{Punct}";
/**
* group(1)
*/
public static String g1 = "(" + annot +"|"+ num + "|" + str + "|" + id + ")?";
/**
* 不放入queue
* \s*比對一個或多個 空白字元
*/
public static String regexPat = "\\s*" + g1 ;
private Pattern pattern = Pattern.compile(regexPat);
private List<Token> queue = new ArrayList<>();
private boolean hasMore;
private LineNumberReader reader;
public Lexer(Reader r) {
hasMore = true;
reader = new LineNumberReader(r);
}
public Token read() throws ParseException {
if (fillQueue(0))
return queue.remove(0);
else
return Token.EOF;
}
public Token peek(int i) throws ParseException {
if (fillQueue(i)) {
return queue.get(i);
} else
return Token.EOF;
}
private boolean fillQueue(int i) throws ParseException {
while (i >= queue.size())
if (hasMore)
readLine();
else
return false;
return true;
}
private void readLine() throws ParseException {
String line = null;
try {
line = reader.readLine();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (line == null) {
hasMore = false;
return;
}
int lineNo = reader.getLineNumber();
Matcher matcher = pattern.matcher(line);
matcher.useTransparentBounds(false);
int pos = 0;
int endPos = line.length();
while (pos < endPos) {
matcher.region(pos, endPos);
if (matcher.lookingAt()) {
addToken(lineNo, matcher);
pos = matcher.end();
} else {
throw new ParseException("bad token at line " + lineNo, pos);
}
queue.add(new IdToken(lineNo, Token.EOL));
}
}
private void addToken(int lineNo, Matcher matcher) {
String m = matcher.group(1);
if (m != null) // if not a space
if (matcher.group(2) == null) {// if not a comment 判斷是否為註解
Token token;
if (matcher.group(3) != null)
token = new NumToken(lineNo, Integer.parseInt(m));
else if (matcher.group(4) != null)
token = new StrToken(lineNo, toStringLiteral(m));
else
token = new IdToken(lineNo, m);
queue.add(token);
}
}
private String toStringLiteral(String s) {
StringBuilder sb = new StringBuilder();
int len = s.length();
for (int i = 1; i < len; i++) {
char c = s.charAt(i);
if (c == '\\' && i + 1 < len) {
int c2 = s.charAt(i + 1);
if (c2 == '"' || c2 == '\\')
c = s.charAt(i++);
else if (c2 == 'n') {
++i;
c = '\n';
}
}
sb.append(c);
}
return sb.toString();
}
static class NumToken extends Token {
private int value;
public NumToken(int lineNo, int v) {
super(lineNo);
value = v;
}
public boolean isNumber() {
return true;
};
public String getText() {
return Integer.toString(value);
}
public int getNumber() {
return value;
}
}
static class IdToken extends Token {
private String text;
public IdToken(int i, String id) {
super(i);
text = id;
}
public boolean isIdentifier() {
return true;
}
public String getText() {
return text;
}
}
static class StrToken extends Token {
private String literal;
public StrToken(int line, String Str) {
super(line);
literal = str;
}
public boolean isTring() {
return true;
}
public String getText() {
return literal;
}
}
}
}