明亮

导航

hash查找与二分查找

今晚心血来潮,实现了下hash查找,查找时间很快.当然hash查找到底有多快,也用了二分查找做比较.

代码如下所示:

  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <sys/time.h>
  4 
  5 #define TABLE_SIZE 100
  6 typedef int elemtype;
  7 #define ELEM_COUNT 3000000
  8 
  9 typedef struct st_node
 10 {
 11     elemtype data;
 12     struct st_node *next;
 13 }node;
 14 typedef node * nodep_t;
 15 
 16 nodep_t * init_hashtab(int tab_size);//初始化hash表
 17 int hash_func(elemtype key);//hash函数
 18 int create_hashtab(nodep_t *hashtab, elemtype *data, int data_num);//创建hash表
 19 nodep_t    get_tail_node(nodep_t pHead);//获取该恋表的尾指针
 20 nodep_t hs_search(nodep_t *hashtab, elemtype elem_val);//根据元素的值,查找出所存储的节点
 21 
 22 int compare(const void *elem1, const void * elem2);//用于快排的比较函数
 23 int binary_search(elemtype *data, int min, int max, elemtype in_val);//二分查找 
 24 
 25 int main(int argc, char *argv[])
 26 {
 27     int r = 0;
 28     nodep_t * pHashTable = NULL, search_node = NULL; 
 29     elemtype *data = NULL, in_val, search_val;
 30     int i = 0;
 31     int hash_index = 0, bs_index = 0;
 32     struct timeval tvafter_hash,tvpre_hash;
 33     struct timeval tvafter_hash2,tvpre_hash2;
 34     struct timeval tvafter_bs,tvpre_bs;
 35     struct timeval tvafter_bs2,tvpre_bs2;
 36 
 37     printf("数据总量为%d\n", ELEM_COUNT);
 38 
 39     data = (elemtype *)malloc(sizeof(elemtype) * ELEM_COUNT);
 40     if (NULL == data)
 41     {
 42         printf("data malloc failed\n");
 43         exit(-1);
 44     }
 45 
 46     for (i = 0; i < ELEM_COUNT; i++)
 47     {
 48         data[i] = rand() % ELEM_COUNT;
 49     }
 50 
 51 
 52     in_val = data[ELEM_COUNT / 2];
 53 
 54     pHashTable = init_hashtab(TABLE_SIZE);
 55     
 56     gettimeofday (&tvpre_hash , NULL);
 57     r = create_hashtab(pHashTable, data, ELEM_COUNT);
 58     if ( r != 0){
 59         printf("create_hashtab --error\n");
 60     }
 61     gettimeofday (&tvafter_hash, NULL);
 62     printf("建立hash表所化时间为%d毫秒\n", 
 63                 (tvafter_hash.tv_sec-tvpre_hash.tv_sec)*1000+(tvafter_hash.tv_usec-tvpre_hash.tv_usec)/1000);
 64 
 65     gettimeofday (&tvpre_hash2 , NULL);
 66     search_node = hs_search(pHashTable, in_val);
 67     gettimeofday (&tvafter_hash2, NULL);
 68 
 69     if ( search_node == NULL)
 70     {
 71         printf("没找到\n");
 72         exit(-1);
 73     }
 74     else
 75     {
 76         search_val = search_node->data;
 77     }
 78 
 79     printf("用hash查找, %d查找出来的索引是%d, 查找出来的值是%d\n", in_val, hash_func(in_val), search_val);
 80     printf("hash查找所化时间为%d毫秒\n", 
 81                 (tvafter_hash2.tv_sec-tvpre_hash2.tv_sec)*1000+(tvafter_hash2.tv_usec-tvpre_hash2.tv_usec)/1000);
 82 
 83     gettimeofday (&tvpre_bs , NULL);
 84     qsort(data, ELEM_COUNT, sizeof(elemtype), compare); 
 85     gettimeofday (&tvafter_bs, NULL);
 86 
 87     printf("快速排序所化时间为%d毫秒\n",(tvafter_bs.tv_sec-tvpre_bs.tv_sec)*1000+(tvafter_bs.tv_usec-tvpre_bs.tv_usec)/1000);
 88     
 89     gettimeofday (&tvpre_bs2, NULL);
 90     bs_index = binary_search(data, 0, ELEM_COUNT - 1, in_val);
 91     if (bs_index < 0)
 92     {
 93         printf("二分查找失败\n");
 94         exit(-1);
 95     }
 96     gettimeofday (&tvafter_bs2, NULL);
 97 
 98     printf("用二分查找, %d查找出来的索引是%d, 查找出来的值是%d\n", in_val, bs_index, data[bs_index]);
 99     printf("二分查找所化时间为%d毫秒\n",
100                 (tvafter_bs2.tv_sec-tvpre_bs2.tv_sec)*1000+(tvafter_bs2.tv_usec-tvpre_bs2.tv_usec)/1000);
101     
102     return 0;
103 }
104 
105 
106 nodep_t * init_hashtab(int tab_size)//建hash表
107 {
108     nodep_t *ppNode = NULL;
109 
110     ppNode = (nodep_t *)malloc(sizeof(nodep_t *) * tab_size);
111     if (NULL == ppNode)
112     {
113         return NULL;
114     }
115     else 
116     {
117         memset(ppNode, 0, sizeof(nodep_t *) * tab_size);
118         return ppNode;
119     }
120 }
121 
122 int create_hashtab(nodep_t *hashtab, elemtype *data, int data_num)//创建hash表
123 {
124     int i = 0;
125     int hash_addr = 0;
126 
127     for (i = 0; i < data_num; i++)
128     {
129         hash_addr = hash_func(data[i]);
130         if (hashtab[hash_addr] == NULL)
131         {
132             hashtab[hash_addr] = (nodep_t)malloc(sizeof(nodep_t));
133             memset(hashtab[hash_addr], 0, sizeof(nodep_t));
134 
135             hashtab[hash_addr]->data = data[i];
136             hashtab[hash_addr]->next = NULL;
137         }
138         else
139         {
140             nodep_t new_node = (nodep_t)malloc(sizeof(node));
141             new_node->data = data[i];
142             new_node->next = NULL;
143             
144             //接下来需要找到尾指针,并使其指向新创建的节点上
145             get_tail_node(hashtab[hash_addr])->next = new_node;
146         }
147     }
148 
149     return 0;
150 }
151 
152 int hash_func(elemtype key)//hash函数
153 {
154     char ch1= 0, ch2 = 0;
155     char *p = &key;
156 
157     memcpy(&ch1, p, sizeof(char));
158     memcpy(&ch2, p + sizeof(char), sizeof(char));
159 
160     return (ch1 * ch1 + ch2 * ch2) % TABLE_SIZE;
161 }
162 
163 nodep_t    get_tail_node(nodep_t pHead)//获取该恋表的尾指针
164 {
165     nodep_t p = pHead;
166 
167     while (p->next != NULL)
168     {
169         p = p->next;
170     }
171 
172     return p;
173 }
174 
175 nodep_t hs_search(nodep_t *hash_tab, elemtype elem_val)
176 {
177     int index = hash_func(elem_val);
178 
179     if (hash_tab[index] == NULL)
180     {
181         return NULL;
182     }
183 
184     if (hash_tab[index]->data != elem_val)
185     {
186         nodep_t tmpNode = hash_tab[index];
187         
188         while (tmpNode != NULL && tmpNode->data != elem_val)
189         {
190             tmpNode = tmpNode->next;
191         }
192 
193         return (tmpNode == NULL)?NULL:tmpNode;
194     }
195     else
196     {
197         return hash_tab[index];
198     }
199 }
200 
201 int compare(const void *elem1, const void * elem2)//用于快排的比较函数
202 {
203     return *(int *)elem1 - *(int *)elem2;
204 }
205 
206 int binary_search(elemtype *data, int min, int max, elemtype in_val)//二分查找 
207 {
208     int mid = 0;
209 
210     if ( min >= max){
211         return -1;
212     }
213 
214     while (min <= max)
215     {
216         mid = (max - min) / 2 + min;
217 
218         if (in_val > data[mid])
219         {
220             min = mid + 1;
221         }
222         else if (in_val < data[mid])
223         {
224             max = mid - 1;
225         }
226         else
227         {
228             return mid;
229         }
230     }
231 
232     return -1;
233 }

 

 

 

从结果来看,查询时间几乎为0,所花时间基本上是在建立hash表时。hash查找的优势没有较二分查找体现出来,时因为数据量还不够大.可惜如果更大的数据量的话,一个是建hash表的时间过长,再一个计算机承受不了,一直发热.测到这里就ok了.但是从理论上来看,hash查找的时间复杂度为O(1),二分查找的则为O(logn);hash查找的劣势因该就是建hash表花时间过多,当然这也跟hash表的设计有关; 从实验来看,hash查找用于数据量巨大,查询频繁的情况下.

posted on 2013-03-09 18:38  灭人沉醉  阅读(2210)  评论(2)    收藏  举报