hash查找与二分查找
今晚心血来潮,实现了下hash查找,查找时间很快.当然hash查找到底有多快,也用了二分查找做比较.
代码如下所示:
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <sys/time.h> 4 5 #define TABLE_SIZE 100 6 typedef int elemtype; 7 #define ELEM_COUNT 3000000 8 9 typedef struct st_node 10 { 11 elemtype data; 12 struct st_node *next; 13 }node; 14 typedef node * nodep_t; 15 16 nodep_t * init_hashtab(int tab_size);//初始化hash表 17 int hash_func(elemtype key);//hash函数 18 int create_hashtab(nodep_t *hashtab, elemtype *data, int data_num);//创建hash表 19 nodep_t get_tail_node(nodep_t pHead);//获取该恋表的尾指针 20 nodep_t hs_search(nodep_t *hashtab, elemtype elem_val);//根据元素的值,查找出所存储的节点 21 22 int compare(const void *elem1, const void * elem2);//用于快排的比较函数 23 int binary_search(elemtype *data, int min, int max, elemtype in_val);//二分查找 24 25 int main(int argc, char *argv[]) 26 { 27 int r = 0; 28 nodep_t * pHashTable = NULL, search_node = NULL; 29 elemtype *data = NULL, in_val, search_val; 30 int i = 0; 31 int hash_index = 0, bs_index = 0; 32 struct timeval tvafter_hash,tvpre_hash; 33 struct timeval tvafter_hash2,tvpre_hash2; 34 struct timeval tvafter_bs,tvpre_bs; 35 struct timeval tvafter_bs2,tvpre_bs2; 36 37 printf("数据总量为%d\n", ELEM_COUNT); 38 39 data = (elemtype *)malloc(sizeof(elemtype) * ELEM_COUNT); 40 if (NULL == data) 41 { 42 printf("data malloc failed\n"); 43 exit(-1); 44 } 45 46 for (i = 0; i < ELEM_COUNT; i++) 47 { 48 data[i] = rand() % ELEM_COUNT; 49 } 50 51 52 in_val = data[ELEM_COUNT / 2]; 53 54 pHashTable = init_hashtab(TABLE_SIZE); 55 56 gettimeofday (&tvpre_hash , NULL); 57 r = create_hashtab(pHashTable, data, ELEM_COUNT); 58 if ( r != 0){ 59 printf("create_hashtab --error\n"); 60 } 61 gettimeofday (&tvafter_hash, NULL); 62 printf("建立hash表所化时间为%d毫秒\n", 63 (tvafter_hash.tv_sec-tvpre_hash.tv_sec)*1000+(tvafter_hash.tv_usec-tvpre_hash.tv_usec)/1000); 64 65 gettimeofday (&tvpre_hash2 , NULL); 66 search_node = hs_search(pHashTable, in_val); 67 gettimeofday (&tvafter_hash2, NULL); 68 69 if ( search_node == NULL) 70 { 71 printf("没找到\n"); 72 exit(-1); 73 } 74 else 75 { 76 search_val = search_node->data; 77 } 78 79 printf("用hash查找, %d查找出来的索引是%d, 查找出来的值是%d\n", in_val, hash_func(in_val), search_val); 80 printf("hash查找所化时间为%d毫秒\n", 81 (tvafter_hash2.tv_sec-tvpre_hash2.tv_sec)*1000+(tvafter_hash2.tv_usec-tvpre_hash2.tv_usec)/1000); 82 83 gettimeofday (&tvpre_bs , NULL); 84 qsort(data, ELEM_COUNT, sizeof(elemtype), compare); 85 gettimeofday (&tvafter_bs, NULL); 86 87 printf("快速排序所化时间为%d毫秒\n",(tvafter_bs.tv_sec-tvpre_bs.tv_sec)*1000+(tvafter_bs.tv_usec-tvpre_bs.tv_usec)/1000); 88 89 gettimeofday (&tvpre_bs2, NULL); 90 bs_index = binary_search(data, 0, ELEM_COUNT - 1, in_val); 91 if (bs_index < 0) 92 { 93 printf("二分查找失败\n"); 94 exit(-1); 95 } 96 gettimeofday (&tvafter_bs2, NULL); 97 98 printf("用二分查找, %d查找出来的索引是%d, 查找出来的值是%d\n", in_val, bs_index, data[bs_index]); 99 printf("二分查找所化时间为%d毫秒\n", 100 (tvafter_bs2.tv_sec-tvpre_bs2.tv_sec)*1000+(tvafter_bs2.tv_usec-tvpre_bs2.tv_usec)/1000); 101 102 return 0; 103 } 104 105 106 nodep_t * init_hashtab(int tab_size)//建hash表 107 { 108 nodep_t *ppNode = NULL; 109 110 ppNode = (nodep_t *)malloc(sizeof(nodep_t *) * tab_size); 111 if (NULL == ppNode) 112 { 113 return NULL; 114 } 115 else 116 { 117 memset(ppNode, 0, sizeof(nodep_t *) * tab_size); 118 return ppNode; 119 } 120 } 121 122 int create_hashtab(nodep_t *hashtab, elemtype *data, int data_num)//创建hash表 123 { 124 int i = 0; 125 int hash_addr = 0; 126 127 for (i = 0; i < data_num; i++) 128 { 129 hash_addr = hash_func(data[i]); 130 if (hashtab[hash_addr] == NULL) 131 { 132 hashtab[hash_addr] = (nodep_t)malloc(sizeof(nodep_t)); 133 memset(hashtab[hash_addr], 0, sizeof(nodep_t)); 134 135 hashtab[hash_addr]->data = data[i]; 136 hashtab[hash_addr]->next = NULL; 137 } 138 else 139 { 140 nodep_t new_node = (nodep_t)malloc(sizeof(node)); 141 new_node->data = data[i]; 142 new_node->next = NULL; 143 144 //接下来需要找到尾指针,并使其指向新创建的节点上 145 get_tail_node(hashtab[hash_addr])->next = new_node; 146 } 147 } 148 149 return 0; 150 } 151 152 int hash_func(elemtype key)//hash函数 153 { 154 char ch1= 0, ch2 = 0; 155 char *p = &key; 156 157 memcpy(&ch1, p, sizeof(char)); 158 memcpy(&ch2, p + sizeof(char), sizeof(char)); 159 160 return (ch1 * ch1 + ch2 * ch2) % TABLE_SIZE; 161 } 162 163 nodep_t get_tail_node(nodep_t pHead)//获取该恋表的尾指针 164 { 165 nodep_t p = pHead; 166 167 while (p->next != NULL) 168 { 169 p = p->next; 170 } 171 172 return p; 173 } 174 175 nodep_t hs_search(nodep_t *hash_tab, elemtype elem_val) 176 { 177 int index = hash_func(elem_val); 178 179 if (hash_tab[index] == NULL) 180 { 181 return NULL; 182 } 183 184 if (hash_tab[index]->data != elem_val) 185 { 186 nodep_t tmpNode = hash_tab[index]; 187 188 while (tmpNode != NULL && tmpNode->data != elem_val) 189 { 190 tmpNode = tmpNode->next; 191 } 192 193 return (tmpNode == NULL)?NULL:tmpNode; 194 } 195 else 196 { 197 return hash_tab[index]; 198 } 199 } 200 201 int compare(const void *elem1, const void * elem2)//用于快排的比较函数 202 { 203 return *(int *)elem1 - *(int *)elem2; 204 } 205 206 int binary_search(elemtype *data, int min, int max, elemtype in_val)//二分查找 207 { 208 int mid = 0; 209 210 if ( min >= max){ 211 return -1; 212 } 213 214 while (min <= max) 215 { 216 mid = (max - min) / 2 + min; 217 218 if (in_val > data[mid]) 219 { 220 min = mid + 1; 221 } 222 else if (in_val < data[mid]) 223 { 224 max = mid - 1; 225 } 226 else 227 { 228 return mid; 229 } 230 } 231 232 return -1; 233 }



从结果来看,查询时间几乎为0,所花时间基本上是在建立hash表时。hash查找的优势没有较二分查找体现出来,时因为数据量还不够大.可惜如果更大的数据量的话,一个是建hash表的时间过长,再一个计算机承受不了,一直发热.测到这里就ok了.但是从理论上来看,hash查找的时间复杂度为O(1),二分查找的则为O(logn);hash查找的劣势因该就是建hash表花时间过多,当然这也跟hash表的设计有关; 从实验来看,hash查找用于数据量巨大,查询频繁的情况下.
浙公网安备 33010602011771号