学习hashtable

还是直接上代码吧:

  1 #include <stdlib.h>
  2 #include <string>
  3 #include <iostream>
  4 #include <fstream>
  5 #include <vector>
  6 #include <list>
  7 
  8 using namespace std;
  9 
 10 static const int hashtable_length    = 6151;
 11 static const int hashtable_compare    = 769;
 12 
 13 // 用于定位一个Bucket
 14 unsigned int hash1(const char* str)
 15 {
 16     const char* end_of_str = str+strlen(str);
 17     unsigned int sum = 0;
 18     while (end_of_str - str > 3)
 19     {
 20         sum = (sum + (unsigned int)*((unsigned int*)str))%hashtable_length;
 21         str += 4;
 22     }
 23     return sum;
 24 }
 25 
 26 // 用于快速比较两个字符串,理论上会提高检索速度
 27 // 用长度来判断是一种简单的方法,有时间可以设计更复杂的方法
 28 unsigned int hash2(const char* str)
 29 {
 30     return strlen(str)%hashtable_compare;    
 31 }
 32 bool find_in_bucket(list<string>& l, const char* str)
 33 {
 34     list<string>::iterator iter;
 35     unsigned int hash_key1 = hash1(str);
 36     unsigned int hash_key2 = hash2(str);
 37     bool exist = false;
 38     for (iter = l.begin(); iter != l.end(); iter++)
 39     {
 40         if (hash_key2 == hash2(iter->c_str()))
 41             if (strcmp(str, iter->c_str()) == 0)
 42             {
 43                 return true;
 44             }
 45     }
 46     return false;
 47 }
 48 int insert_in_bucket(list<string>& l, const char* str)
 49 {
 50     if (!find_in_bucket(l, str))
 51     {
 52         l.push_back(string(str));
 53         return l.size();
 54     }else
 55         return -1;
 56 }
 57 bool find_in_hashtable(vector<list<string>>& v, const char* str)
 58 {
 59     return find_in_bucket(v[hash1(str)], str);
 60 }
 61 int insert_in_hashtable(vector<list<string>>& v, const char* str)
 62 {
 63     return insert_in_bucket(v[hash1(str)], str);
 64 }
 65 
 66 int main()
 67 {
 68     // 学习list
 69     // 学习hashtable
 70 
 71     vector<list<string>> hashtable(hashtable_length, list<string>());
 72     
 73     ofstream log_file;
 74     log_file.open("D:\\log.txt");
 75     ifstream input_file;
 76     input_file.open("D:\\input.txt");
 77     if(!input_file)
 78         return -1;
 79 
 80     // 10KB的缓冲区。注意!如果缓冲区过大,会导致栈空间不足,在函数层层调用的时候,会产生 stack overflow 错误!
 81     char buff[10240] = {0};    
 82     int max_of_bucket = -1;
 83     while (input_file.getline(buff, 10240-1))
 84     {
 85         // 使用哈希表,存储海量数据,能够快速查找和插入
 86         int len = insert_in_hashtable(hashtable, buff);
 87         if (len > max_of_bucket) 
 88             max_of_bucket = len;
 89         log_file<< "hashkey\t= \t" << hash1(buff) << "\nlength\t=\t"<< len << "\n" << endl;
 90     }
 91     log_file<< "max_of_bucket = " << max_of_bucket << endl;
 92     input_file.close();
 93 
 94     // 快速查找
 95     input_file.open("D:\\test.txt");
 96     if(!input_file)
 97         return -2;
 98     while (input_file.getline(buff, 10240-1))
 99     {
100         // 使用哈希表,存储海量数据,能够快速查找
101         if (find_in_hashtable(hashtable, buff))
102             log_file.write("Found it !\n", strlen("Found it !\n"));
103         else
104             log_file.write("Missed it !\n", strlen("Missed it !\n"));
105     }
106     input_file.close();
107     log_file.close();
108 
109     return 0;
110 }

 

 

posted @ 2013-09-22 23:30  铁甲小宝  阅读(219)  评论(0编辑  收藏  举报