Java转义字符详解
转义字符 | 缩写/字符 | 解释 | Bin | Oct | Dec | Hex |
---|---|---|---|---|---|---|
\a | BEL (bell) | 响铃 | 0000 0111 | 07 | 7 | 0x07 |
\b | BS (backspace) | 退格 | 0000 1000 | 010 | 8 | 0x08 |
\f | FF (NP form feed, new page) | 换页键 | 0000 1100 | 014 | 12 | 0x0C |
\n | LF (NL line feed, new line) | 换行键 | 0000 1010 | 012 | 10 | 0x0A |
\r | CR (carriage return) | 回车键 | 0000 1101 | 015 | 13 | 0x0D |
\t | HT (horizontal tab) | 水平制表符 | 0000 1001 | 011 | 9 | 0x09 |
\v | VT (vertical tab) | 垂直制表符 | 0000 1011 | 013 | 11 | 0x0B |
\\ | \ | 反斜杠 | 0101 1100 | 0134 | 92 | 0x5C |
\' | ' | 单引号 | 0010 0111 | 047 | 39 | 0x27 |
\" | " | 双引号 | 0010 0010 | 042 | 34 | 0x22 |
\ddd | ASCII code | ASCII码 | 0 - 0177 | |||
\udddd | Unicode | 0x0000 - 0xFFFF | ||||
\udddd\udddd | 0xD800 - 0xDB7F High Surrogates 0xDC00 - 0xDFFF Low Surrogates |
常用转义字符码表
Javac
调用链
容易在 openjdk-8u41 中找到 Compiler 的某条调用链javac
->com.sun.tools.javac.main.Main
->JavaCompiler
->JavacParser
->JavaTokenizer
即老生常谈地,词法分析器(Lexical Analyzer)将输入流转为标记序列(Token Sequence),然后交由语法分析器(Parser)转换为抽象语法树(Abstract Syntax Tree)
在词法分析阶段,字符/字符串字面量(Character or String Literal)将得到转换
nextToken
JavaTokenizer.readToken()
可以看作转化字面量的入口
/*
* The example is pruned from the method 'readToken' in the
* 'JavaTokenizer' class from the 'com.sun.tools.javac.parser'
* package in the langtools module from the 'openjdk8' project.
*/
public Token readToken() {
reader.sp = 0;
name = null;
radix = 0;
int pos = 0;
int endPos = 0;
List<Comment> comments = null;
try {
loop: while (true) {
pos = reader.bp;
switch (reader.ch) {
case /* (space) \ HT \ FF \ LF \ CR */
case /* A-Z \ a-z \ $ \ _ */
case /* 0 */
case /* 1-9 */
case /* . */
case /* , \ ; \ ( \ ) \ [ \ ] \ { \ } \ Comments */
case '\'':
reader.scanChar();
if (reader.ch == '\'') {
lexError(pos, "empty.char.lit");
} else {
if (reader.ch == CR || reader.ch == LF)
lexError(pos, "illegal.line.end.in.char.lit");
scanLitChar(pos);
char ch2 = reader.ch;
if (reader.ch == '\'') {
reader.scanChar();
tk = TokenKind.CHARLITERAL;
} else {
lexError(pos, "unclosed.char.lit");
}
}
break loop;
case '\"':
reader.scanChar();
while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
scanLitChar(pos);
if (reader.ch == '\"') {
tk = TokenKind.STRINGLITERAL;
reader.scanChar();
} else {
lexError(pos, "unclosed.str.lit");
}
break loop;
default /* Operator \ Identifier \ EOF \ Illegal */
}
}
endPos = reader.bp;
switch (tk.tag) {
case DEFAULT: return new Token(tk, pos, endPos, comments);
case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
default: throw new AssertionError();
}
}
finally /* Println Debug */
}