import java.util.TreeMap;
public class Trie {
private class Node {
public boolean isWord;
public TreeMap<Character, Node> next;
public Node(boolean isWord) {
this.isWord = isWord;
next = new TreeMap<>();
}
public Node() {
this(false);
}
}
public Node root;
private int size;
public Trie() {
root = new Node();
size = 0;
}
// 获得Trie中存储的单词数量
public int getSize() {
return size;
}
// 向Trie中添加一个新的单词word
public void add(String word) {
Node cur = root;
for(int i = 0; i < word.length(); ++ i) {
char c = word.charAt(i);
if(cur.next.get(c) == null) {
cur.next.put(c, new Node());
}
cur = cur.next.get(c);
}
if(!cur.isWord) {
cur.isWord = true;
size ++;
}
}
// 查询单词word是否在Trie中
public boolean contains(String word) {
Node cur = root;
for(int i = 0; i < word.length(); ++ i) {
char c = word.charAt(i);
if(cur.next.get(c) == null) {
return false;
}
cur = cur.next.get(c);
}
return cur.isWord;
}
// 查询是否在Trie中有单词以prefix为前缀
public boolean isPrefix(String prefix) {
Node cur = root;
for(int i = 0; i < prefix.length(); ++ i) {
char c = prefix.charAt(i);
if(cur.next.get(c) == null) {
return false;
}
cur = cur.next.get(c);
}
return true;
}
public boolean match(Node node, String word, int index) {
if(index == word.length()) {
return node.isWord;
}
char c = word.charAt(index);
if(c != '.') {
if(node.next.get(c) == null) {
return false;
}
return match(node.next.get(c), word, index + 1);
}
else {
for(char nextChar : node.next.keySet()) {
if(match(node.next.get(nextChar), word, index + 1)) {
return true;
}
}
return false;
}
}
}
import java.util.ArrayList;
public class Main {
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words)) {
long startTime = System.nanoTime();
Trie trie = new Trie();
for(String word : words) {
trie.add(word);
}
for(String word : words) {
trie.contains(word);
}
long endTime = System.nanoTime();
// 纳秒向秒的转换
double time = (endTime - startTime) / 1000000000.0;
System.out.println("Total different words: " + trie.getSize());
System.out.println("Trie: " + time + " s");
}
}
}
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Scanner;
// 文件相关操作
public class FileOperation {
// 读取文件名称为filename中的内容,并将其中包含的所有词语放进words中
public static boolean readFile(String filename, ArrayList<String> words){
if (filename == null || words == null){
System.out.println("filename is null or words is null");
return false;
}
// 文件读取
Scanner scanner;
try {
File file = new File(filename);
if(file.exists()){
FileInputStream fis = new FileInputStream(file);
scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
scanner.useLocale(Locale.ENGLISH);
}
else
return false;
}
catch(IOException ioe){
System.out.println("Cannot open " + filename);
return false;
}
// 简单分词
// 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题
// 在这里只做demo展示用
if (scanner.hasNextLine()) {
String contents = scanner.useDelimiter("\\A").next();
int start = firstCharacterIndex(contents, 0);
for (int i = start + 1; i <= contents.length(); )
if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {
String word = contents.substring(start, i).toLowerCase();
words.add(word);
start = firstCharacterIndex(contents, i);
i = start + 1;
} else
i++;
}
return true;
}
// 寻找字符串s中,从start的位置开始的第一个字母字符的位置
private static int firstCharacterIndex(String s, int start){
for( int i = start ; i < s.length() ; i ++ )
if( Character.isLetter(s.charAt(i)) )
return i;
return s.length();
}
}