明亮

导航

hash查找与二分查找

今晚心血来潮,实现了下hash查找,查找时间很快.

代码如下所示:

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define TABLE_SIZE 100
typedef int elemtype;
#define ELEM_COUNT 3000000

typedef struct st_node
{
	elemtype data;
	struct st_node *next;
}node;
typedef node * nodep_t;

nodep_t * init_hashtab(int tab_size);//初始化hash表
int hash_func(elemtype key);//hash函数
int create_hashtab(nodep_t *hashtab, elemtype *data, int data_num);//创建hash表
nodep_t	get_tail_node(nodep_t pHead);//获取该恋表的尾指针
nodep_t hs_search(nodep_t *hashtab, elemtype elem_val);//根据元素的值,查找出所存储的节点

int compare(const void *elem1, const void * elem2);//用于快排的比较函数
int binary_search(elemtype *data, int min, int max, elemtype in_val);//二分查找 

int main(int argc, char *argv[])
{
	int r = 0;
	nodep_t * pHashTable = NULL, search_node = NULL; 
	elemtype *data = NULL, in_val, search_val;
	int i = 0;
	int hash_index = 0, bs_index = 0;
	struct timeval tvafter_hash,tvpre_hash;
	struct timeval tvafter_hash2,tvpre_hash2;
	struct timeval tvafter_bs,tvpre_bs;
	struct timeval tvafter_bs2,tvpre_bs2;

	printf("数据总量为%d\n", ELEM_COUNT);

	data = (elemtype *)malloc(sizeof(elemtype) * ELEM_COUNT);
	if (NULL == data)
	{
		printf("data malloc failed\n");
		exit(-1);
	}

	for (i = 0; i < ELEM_COUNT; i++)
	{
		data[i] = rand() % ELEM_COUNT;
	}


	in_val = data[ELEM_COUNT / 2];

	pHashTable = init_hashtab(TABLE_SIZE);
	
	gettimeofday (&tvpre_hash , NULL);
	r = create_hashtab(pHashTable, data, ELEM_COUNT);
	if ( r != 0){
		printf("create_hashtab --error\n");
	}
	gettimeofday (&tvafter_hash, NULL);
	printf("建立hash表所化时间为%d毫秒\n", 
				(tvafter_hash.tv_sec-tvpre_hash.tv_sec)*1000+(tvafter_hash.tv_usec-tvpre_hash.tv_usec)/1000);

	gettimeofday (&tvpre_hash2 , NULL);
	search_node = hs_search(pHashTable, in_val);
	gettimeofday (&tvafter_hash2, NULL);

	if ( search_node == NULL)
	{
		printf("没找到\n");
		exit(-1);
	}
	else
	{
		search_val = search_node->data;
	}

	printf("用hash查找, %d查找出来的索引是%d, 查找出来的值是%d\n", in_val, hash_func(in_val), search_val);
	printf("hash查找所化时间为%d毫秒\n", 
				(tvafter_hash2.tv_sec-tvpre_hash2.tv_sec)*1000+(tvafter_hash2.tv_usec-tvpre_hash2.tv_usec)/1000);

	gettimeofday (&tvpre_bs , NULL);
	qsort(data, ELEM_COUNT, sizeof(elemtype), compare); 
	gettimeofday (&tvafter_bs, NULL);

	printf("快速排序所化时间为%d毫秒\n",(tvafter_bs.tv_sec-tvpre_bs.tv_sec)*1000+(tvafter_bs.tv_usec-tvpre_bs.tv_usec)/1000);
	
	gettimeofday (&tvpre_bs2, NULL);
	bs_index = binary_search(data, 0, ELEM_COUNT - 1, in_val);
	if (bs_index < 0)
	{
		printf("二分查找失败\n");
		exit(-1);
	}
	gettimeofday (&tvafter_bs2, NULL);

	printf("用二分查找, %d查找出来的索引是%d, 查找出来的值是%d\n", in_val, bs_index, data[bs_index]);
	printf("二分查找所化时间为%d毫秒\n",
				(tvafter_bs2.tv_sec-tvpre_bs2.tv_sec)*1000+(tvafter_bs2.tv_usec-tvpre_bs2.tv_usec)/1000);
	
	return 0;
}


nodep_t * init_hashtab(int tab_size)//建hash表
{
	nodep_t *ppNode = NULL;

	ppNode = (nodep_t *)malloc(sizeof(nodep_t *) * tab_size);
	if (NULL == ppNode)
	{
		return NULL;
	}
	else 
	{
		memset(ppNode, 0, sizeof(nodep_t *) * tab_size);
		return ppNode;
	}
}

int create_hashtab(nodep_t *hashtab, elemtype *data, int data_num)//创建hash表
{
	int i = 0;
	int hash_addr = 0;

    for (i = 0; i < data_num; i++)
	{
		hash_addr = hash_func(data[i]);
		if (hashtab[hash_addr] == NULL)
		{
			hashtab[hash_addr] = (nodep_t)malloc(sizeof(nodep_t));
			memset(hashtab[hash_addr], 0, sizeof(nodep_t));

			hashtab[hash_addr]->data = data[i];
			hashtab[hash_addr]->next = NULL;
		}
		else
		{
			nodep_t new_node = (nodep_t)malloc(sizeof(node));
			new_node->data = data[i];
			new_node->next = NULL;
			
			//接下来需要找到尾指针,并使其指向新创建的节点上
			get_tail_node(hashtab[hash_addr])->next = new_node;
		}
	}

	return 0;
}

int hash_func(elemtype key)//hash函数
{
	char ch1= 0, ch2 = 0;
	char *p = &key;

	memcpy(&ch1, p, sizeof(char));
	memcpy(&ch2, p + sizeof(char), sizeof(char));

	return (ch1 * ch1 + ch2 * ch2) % TABLE_SIZE;
}

nodep_t	get_tail_node(nodep_t pHead)//获取该恋表的尾指针
{
	nodep_t p = pHead;

	while (p->next != NULL)
	{
		p = p->next;
	}

	return p;
}

nodep_t hs_search(nodep_t *hash_tab, elemtype elem_val)
{
	int index = hash_func(elem_val);

	if (hash_tab[index] == NULL)
	{
		return NULL;
	}

	if (hash_tab[index]->data != elem_val)
	{
		nodep_t tmpNode = hash_tab[index];
		
		while (tmpNode != NULL && tmpNode->data != elem_val)
		{
			tmpNode = tmpNode->next;
		}

		return (tmpNode == NULL)?NULL:tmpNode;
	}
	else
	{
		return hash_tab[index];
	}
}

int compare(const void *elem1, const void * elem2)//用于快排的比较函数
{
	return *(int *)elem1 - *(int *)elem2;
}

int binary_search(elemtype *data, int min, int max, elemtype in_val)//二分查找 
{
	int mid = 0;

	if ( min >= max){
		return -1;
	}

	while (min <= max)
	{
		mid = (max - min) / 2 + min;

		if (in_val > data[mid])
		{
			min = mid + 1;
		}
		else if (in_val < data[mid])
		{
			max = mid - 1;
		}
		else
		{
			return mid;
		}
	}

	return -1;
}

 

 

 

从结果来看,查询时间几乎为0,所花时间基本上是在建立hash表时。hash查找的优势没有较二分查找体现出来,时因为数据量还不够大.可惜如果更大的数据量的话,计算机承受不了,一直发热.所以就测到这里就ok了.

posted on 2013-03-09 18:23  灭人沉醉  阅读(155)  评论(0)    收藏  举报