Trie字典树算法（转）

特性

Trie树属于树形结构，查询效率比红黑树和哈希表都要快。假设有这么一种应用场景：有若干个英文单词，需要快速查找某个单词是否存在于字典中。使用Trie时先从根节点开始查找，直至匹配到给出字符串的最后一个节点。在建立字典树结构时，预先把带有相同前缀的单词合并在同一节点，直至两个单词的某一个字母不同，则再从发生差异的节点中分叉一个子节点。

节点结构： 每个节点对应一个最大可储存字符数组。假设字典只存26个小写英文字母，那么每个节点下应该有一个长度为26的数组。换言说，可存的元素类型越多，单个节点占用内存越大。如果用字典树储存汉字，那么每个节点必须为数千个常用汉字开辟一个数组作为储存空间，占用的内存实在不是一个数量级。不过Trie树就是一种用空间换时间的数据结构，鱼和熊掌往往不可兼得。

建树细节：

取要插入字符串的首个字符，从根节点的孩子节点开始，匹配当前字符是否已有节点，有则把指针指向该节点。无则为该字符创建节点，并把指针指向该新建节点。
迭代。
遇到要插入字符串末尾结束符时停止迭代，并把最后一个非’\0′字符对应的节点设为末端节点。

查找细节： 循环取要插入字符串的首个字符，从根节点的孩子节点开始，匹配当前字符是否已有节点，有则继续循环，无则返回False. 直至匹配到最后一个字符则完成查找。

树结构图： 我们用apps, apply, apple, append, back, basic, backen几英文单词创建树形结构

上图很容易看出，有相同前缀的英文单词，会合并在同一个节点，Trie树顺着一个个节点进行检索，直至找到最后一个节点。代码如下：

#include <stdio.h>
 
struct trie_node
{
    static const int letter_count = 26;
 
    int count;
    bool is_terminal;
    char letter;
    trie_node* childs[letter_count];
 
    trie_node()
        : letter(0), count(1), is_terminal(false)
    {
        for (int i = 0; i < letter_count; ++i)
            childs[i] = NULL;
    }
};
 
class trie
{
public:
    trie()
        : root_node_(NULL)
    {
    }
 
    ~trie()
    {
        delete_trie(root_node_);
    }
 
public:
    trie_node* create()
    {
        trie_node* n = new trie_node();
        return n;
    }
 
    void insert(const char* str)
    {
        if (!root_node_ || !str)
            root_node_ = create();
 
        trie_node* next_element_node = root_node_;
        while (*str != 0)
        {
            char element_index = *str - 'a';
            if (!next_element_node->childs[element_index])
            {
                next_element_node->childs[element_index] = create();
            }
            else
            {
                next_element_node->childs[element_index]->count++;
            }
 
            next_element_node = next_element_node->childs[element_index];
            next_element_node->letter = *str;
            str++;
        }
 
        next_element_node->is_terminal = true;
    }
 
    bool find_word_exists(const char* str)
    {
        if (!root_node_ || !str)
            return NULL;
 
        trie_node* element_node = root_node_;
        do
        {
            element_node = element_node->childs[*str - 'a'];
            if (!element_node) return false;
            str++;
        } while (*str != 0);
 
        return element_node->is_terminal;
    }
 
    void delete_trie(trie_node* node)
    {
        if (!node) return;
        for(int i = 0; i < trie_node::letter_count; i++)
        {
            if(node->childs[i] != NULL)
                delete_trie(node->childs[i]);
        }
 
        delete node;
    }
 
private:
    trie_node* root_node_;
};

#define MAX 26    //26个字母
#define SLEN 100   //节点中存储的字符串长度
//Trie结构体定义
struct Trie
{
    struct Trie *next[MAX];
    char s[SLEN];      //节点处存储的字符串
    int isword;         //节点处是否为单词
    char val;           //节点的代表字符
} *root;
//初始化Trie树
struct Trie *init()
{
    struct Trie *root = (struct Trie *)malloc(sizeof(struct Trie));
    int i;
    for (i = 0; i < MAX; i++)
    {
        root -> next[i] = NULL;
    }
    root -> isword = 0;
    root -> val = 0;
    return root;
}
//按照指定路径path 插入字符串 s
void insert(char path[], char s[])
{
    struct Trie *t, *p = root;
    int i, j, n = strlen(path);

    for (i = 0; i < n; i++)
    {
        if (p -> next[path[i] - 'a'] == NULL)
        {
            t = (struct Trie *)malloc(sizeof(struct Trie));
            for (j = 0; j < MAX; j++)
            {
                t -> next[j] = NULL;
                t -> isword = 0;
            }
            t -> val = path[i];
            p -> next[path[i] - 'a'] = t;
        }
        p = p -> next[path[i] - 'a'];
    }
    p -> isword = 1;
    strcpy(p -> s , s);
}
//按照指定路径 path 查找
char *find(char path[], int delflag)
{
    struct Trie *p = root;
    int i = 0, n = strlen(path);
    while (p && path[i])
    {
        p = p -> next[path[i++] - 'a'];
    }
    if (p && p -> isword)
    {
        p -> isword = delflag;
        return p->s;
    }
    return NULL;
}
//删除整棵Trie树
void del(struct Trie *root)
{
    int i;
    if (!root)
        return;
    for (i = 0; i < MAX; i++)
    {
        if (root->next[i])
            del(root->next[i]);
        free(root->next[i]);
    }
}

posted @ 2014-07-15 07:12 SUNFC 阅读(87) 评论(0) 收藏举报

刷新页面返回顶部

SUNFC

Trie字典树算法（转）

特性

公告