Trie

import java.util.TreeMap;

public class Trie {

	private class Node {
		
		public boolean isWord;
		public TreeMap<Character, Node> next;
		
		public Node(boolean isWord) {
			this.isWord = isWord;
			next = new TreeMap<>();
		}
		
		public Node() {
			this(false);
		}
	}
	
	public Node root;
	private int size;
	
	public Trie() {
		root = new Node();
		size = 0;
	}
	
	// 获得Trie中存储的单词数量
	public int getSize() {
		return size;
	}
	
	// 向Trie中添加一个新的单词word
	public void add(String word) {
		
		Node cur = root;
		for(int i = 0; i < word.length(); ++ i) {
			char c = word.charAt(i);
			if(cur.next.get(c) == null) {
				cur.next.put(c, new Node());
			}
			cur = cur.next.get(c);
		}
		
		if(!cur.isWord) {
			cur.isWord = true;
			size ++;
		}
	}
	
	// 查询单词word是否在Trie中
	public boolean contains(String word) {
		
		Node cur = root;
		for(int i = 0; i < word.length(); ++ i) {
			char c = word.charAt(i);
			if(cur.next.get(c) == null) {
				return false;
			}
			cur = cur.next.get(c);
		}
		
		return cur.isWord;
	}
	
	// 查询是否在Trie中有单词以prefix为前缀
	public boolean isPrefix(String prefix) {
		
		Node cur = root;
		for(int i = 0; i < prefix.length(); ++ i) {
			char c = prefix.charAt(i);
			if(cur.next.get(c) == null) {
				return false;
			}
			cur = cur.next.get(c);
		}
		return true;
	}
	
	public boolean match(Node node, String word, int index) {
		
		if(index == word.length()) {
			return node.isWord;
		}
		
		char c = word.charAt(index);
		if(c != '.') {
			if(node.next.get(c) == null) {
				return false;
			}
			return match(node.next.get(c), word, index + 1);
		}
		else {
			for(char nextChar : node.next.keySet()) {
				if(match(node.next.get(nextChar), word, index + 1)) {
					return true;
				}
			}
			return false;
		}
	}
}

import java.util.ArrayList;

public class Main {

	public static void main(String[] args) {

		System.out.println("Pride and Prejudice");
		
		ArrayList<String> words = new ArrayList<>();
		if(FileOperation.readFile("pride-and-prejudice.txt", words)) {
			
			long startTime = System.nanoTime();
			
			Trie trie = new Trie();
			for(String word : words) {
				trie.add(word);
			}
			
			for(String word : words) {
				trie.contains(word);
			}
			
			long endTime = System.nanoTime();
			// 纳秒向秒的转换
			double time = (endTime - startTime) / 1000000000.0;
			
			System.out.println("Total different words: " + trie.getSize());
			System.out.println("Trie: " + time + " s");
		}
	}

}

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Scanner;

// 文件相关操作
public class FileOperation {

    // 读取文件名称为filename中的内容，并将其中包含的所有词语放进words中
    public static boolean readFile(String filename, ArrayList<String> words){

        if (filename == null || words == null){
            System.out.println("filename is null or words is null");
            return false;
        }

        // 文件读取
        Scanner scanner;

        try {
            File file = new File(filename);
            if(file.exists()){
                FileInputStream fis = new FileInputStream(file);
                scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
                scanner.useLocale(Locale.ENGLISH);
            }
            else
                return false;
        }
        catch(IOException ioe){
            System.out.println("Cannot open " + filename);
            return false;
        }

        // 简单分词
        // 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题
        // 在这里只做demo展示用
        if (scanner.hasNextLine()) {

            String contents = scanner.useDelimiter("\\A").next();

            int start = firstCharacterIndex(contents, 0);
            for (int i = start + 1; i <= contents.length(); )
                if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {
                    String word = contents.substring(start, i).toLowerCase();
                    words.add(word);
                    start = firstCharacterIndex(contents, i);
                    i = start + 1;
                } else
                    i++;
        }

        return true;
    }

    // 寻找字符串s中，从start的位置开始的第一个字母字符的位置
    private static int firstCharacterIndex(String s, int start){

        for( int i = start ; i < s.length() ; i ++ )
            if( Character.isLetter(s.charAt(i)) )
                return i;
        return s.length();
    }
}

posted @ 2019-05-17 20:31 青衫客36 阅读(165) 评论(0) 收藏举报

刷新页面返回顶部

青衫客36

心之所向, 素履以往, 生如逆旅, 一苇以航

Trie

公告