词法分析器1.0版
package org.experience;
import java.io.*;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
/**
* Java程序语言的单词符号的种别:
* 关键字:1
* 操作符:2
* 分界符:3
* 标识符:4
* 常数:5
* 无识别:6
*/
@SuppressWarnings("all")
public class WordAnalysis_Java {
private HashSet<String> keyWord = readKeyWord();
private HashSet<String> operator = operator();
private HashSet<String> delimiter = delimiter();
public WordAnalysis_Java() throws Exception {
}
public static void main(String[] args) throws Exception {
File file = new File("D:\\compileDoc\\WordAnalysis.java");
WordAnalysis_Java a = new WordAnalysis_Java();
List<String> src = a.readSrc(file);
List<String> word = a.processSrc(src);
word = a.processSrc2(word);
// for (String s : word) {
// System.out.println(s);
// }
a.analysis(word);
// for (String string : src) {
// System.out.println(string);
// }
// HashSet<String> keyWord = a.readKeyWord();
// for (String s : keyWord) {
// System.out.println(s);
// }
}
/**
* 词法分析
* @param words
*/
public void analysis(List<String> words) {
for (String word : words) {
if (keyWord.contains(word)) {
System.out.println("关键字:\t" + word);
} else if (isDigit2(word)) {
System.out.println("数字:\t" + word);
} else if (operator.contains(word)) {
System.out.println("运算符:\t" + word);
} else if (delimiter.contains(word)) {
System.out.println("分界符:\t" + word);
} else if (isIdentifier(word)){
System.out.println("标识符:\t" + word);
}else {
System.out.println("其他:\t" + word);
}
}
}
//判断当前词是否为数字
public boolean isDigit2(String str){
return str.matches("[0-9]+");
}
//判断当前词是否为标识符
public boolean isIdentifier(String str){
str = str.toLowerCase();
return str.matches("[a-z]+");
}
/**
* 读取源程序
*
* @param file 源文件
* @return
* @throws Exception
*/
public List<String> readSrc(File file) throws Exception {
ArrayList<String> strings = new ArrayList<>();
BufferedReader br = new BufferedReader(new FileReader(file));
String line = "";
while ((line = br.readLine()) != null) {
line = line.replace(" ", " ");
line = line.replace(" ", " ");
line = line.replace(" ", " ");
line = line.replace(" ", " ");
String[] s = line.split(" ");
for (String s1 : s) {
strings.add(s1);
}
}
br.close();
return strings;
}
/**
*
* @param src
* @return
* @throws Exception
*/
public List<String> processSrc2(List<String> src) throws Exception {
ArrayList<String> single = new ArrayList<>();
StringBuffer buffer = new StringBuffer();
for (String s : src) {
buffer.append(s + " ");
}
String s = buffer.toString();
s = s.replace(".", " . ");
s = s.replace(";", " ; ");
s = s.replace("\"", " \" ");
s = s.replace("[", " [ ");
s = s.replace("]", " ] ");
s = s.replace("{", " { ");
s = s.replace("}", " } ");
s = s.replace(")", " ) ");
s = s.replace("(", " ( ");
s = s.replace("=", " = ");
s = s.replace("<", " < ");
s = s.replace(">", " > ");
s = s.replace("++", " ++ ");
s = s.replace("+", " + ");
s = s.replace("-", " - ");
s = s.replace("*", " * ");
s = s.replace("/", " / ");
s = s.replace("%", " % ");
s = s.replace("!", " ! ");
s = s.replace(":", " : ");
s = s.replace("\'", " \' ");
s = s.replace("\\t", " \\t ");
s = s.replace("= =", " == ");
s = s.replace("+ =", " += ");
s = s.replace("+ +", " += ");
s = s.replace("> =", " >= ");
s = s.replace("< =", " <= ");
s = s.replace("- -", " -- ");
s = s.replace("&&", " && ");
s = s.replace("||", " || ");
s = s.replace(" ", " ");
s = s.replace(" ", " ");
// System.out.println(s);
String[] s1 = s.split(" ");
for (String s2 : s1) {
single.add(s2);
}
return single;
}
/**
* 处理源程序
* 删除注释和空白行
*
* @param src
* @return input=input.replaceAll("(?m)(?:\n|\r\n)(?:^\s*$)", "");
* input=input.replaceAll("(?m)(?:^\s*$(?:\n|\r\n))", "");
*/
public List<String> processSrc(List<String> src) throws Exception {
ArrayList<String> words = new ArrayList<>();
ArrayList<String> words2 = new ArrayList<>();
//粗略去除空行和当行注释
for (String s : src) {
if (!s.equals("")) {
// words.add(s);
s = s.replaceAll("^(//)(.*?)$", "");
words.add(s);
}
}
StringBuffer buffer = new StringBuffer();
for (String word : words) {
if (!word.equals("")) {
buffer.append(word + "\t");
// word = word.replaceAll("^(/*)(.*?)(/)$","");
// words2.add(word);
}
}
// System.out.println(buffer);
String s = buffer.toString();
// System.out.println(s);
s = s.replaceAll("/\\*{1,2}[\\s\\S]*?\\*/", "");
s = s.replaceAll("//[\\s\\S]*?\\n\n", "");
s = s.replaceAll("^(//)(.*?)$", "");
String[] split = s.split("\t");
for (String s1 : split) {
// System.out.println(s1);
if (!s1.equals("")) {
// s1 = s1.replaceAll("//[\\s\\S]*?\\n\n","");
s1 = s1.replaceAll("(//)(.*?)$", "");
words2.add(s1);
}
}
return words2;
}
/**
* 判断一个字符是否是数字
*
* @param letter
* @return
*/
public boolean isDigit(char letter) {
if (letter >= '0' && letter <= '9')
return true;
else {
return false;
}
}
/**
* 判断一个字符是否是字母
*
* @param letter
* @return
*/
public boolean isLetter(char letter) {
if ((letter >= 'a' && letter <= 'z') || (letter >= 'A' && letter <= 'Z'))
return true;
else {
return false;
}
}
/**
* 判断一个词是否是关键字
*
* @param word 当前词
* @param keyWord 关键字表
* @return
*/
public boolean isKeyWord(String word, HashSet<String> keyWord) {
if (keyWord.contains(word)) {
return true;
} else {
return false;
}
}
/**
* 建立关键字表
*
* @throws Exception
*/
public static HashSet<String> readKeyWord() throws Exception {
// ArrayList<String> keyWord = new ArrayList<>();
HashSet<String> keyWord = new HashSet<>();
BufferedReader br = new BufferedReader(new FileReader("D:\\compileDoc\\keyWord.txt"));
String line = "";
while ((line = br.readLine()) != null) {
line = line.replace("\t", " ");
// System.out.println(line);
String[] s = line.split(" ");
for (String s1 : s) {
keyWord.add(s1);
}
}
// System.out.println(keyWord.size());
br.close();
return keyWord;
}
/**
* 分界符
*
* @return
*/
public static HashSet<String> delimiter() {
HashSet<String> hashSet = new HashSet<>();
hashSet.add(";");
hashSet.add(".");
hashSet.add("{");
hashSet.add("}");
hashSet.add(")");
hashSet.add("(");
hashSet.add("[");
hashSet.add("]");
hashSet.add("\"");
hashSet.add("\'");
return hashSet;
}
/**
* @return
*/
public static HashSet<String> operator() {
HashSet<String> hashSet = new HashSet<>();
hashSet.add("+");
hashSet.add("-");
hashSet.add("*");
hashSet.add("/");
hashSet.add("%");
hashSet.add("++");
hashSet.add("--");
hashSet.add("==");
hashSet.add("!=");
hashSet.add(">");
hashSet.add("<");
hashSet.add(">=");
hashSet.add("<=");
hashSet.add("&");
hashSet.add("&&");
hashSet.add("|");
hashSet.add("^");
hashSet.add("~");
hashSet.add("<<");
hashSet.add(">>");
hashSet.add(">>>");
hashSet.add("||");
hashSet.add("!");
hashSet.add("+=");
hashSet.add("-=");
hashSet.add("%=");
hashSet.add("/=");
hashSet.add("*=");
hashSet.add("=");
hashSet.add("<<=");
hashSet.add(">>=");
hashSet.add("&=");
hashSet.add("^=");
hashSet.add("|=");
hashSet.add("?:");
hashSet.add("instanceof");
return hashSet;
}
}