hash map link list

#if 1
#define SEARCHWORD_MAXLEN 12
#define PREFIX_MAXLEN 8

// The below commented functions are for your reference. If you want 
// to use it, uncomment these functions.

int mstrcmp(const char *a, const char *b)
{
	int i;
	for (i = 0; a[i] != '\0'; i++)
	{
		if (a[i] != b[i])
			return a[i] - b[i];
	}
	return a[i] - b[i];
}

int mstrncmp(const char *a, const char *b, int len)
{
	for (int i = 0; i < len; i++)
	{
		if (a[i] != b[i])
			return a[i] - b[i];
	}
	return 0;
}

int mstrlen(const char *a)
{
	int len = 0;

	while (a[len] != '\0')
		len++;

	return len;
}

void mstrcpy(char *dest, const char *src)
{
	int i = 0;
	while (src[i] != '\0')
	{
		dest[i] = src[i];
		i++;
	}
	dest[i] = src[i];
}

void mstrncpy(char *dest, const char *src, int len)
{
	for (int i = 0; i<len; i++)
	{
		dest[i] = src[i];
	}
	dest[len] = '\0';
}


struct NODE
{
	char word[20];
	int freq;
	NODE * next;
};

//4-5-6-7-8
NODE datalst[13][5001];

NODE result[100];
int result_cnt = 0;

int get_hash_code(char* str, int len)
{
	unsigned int hash_code = 0;
	int i = 0;
	while (str[i] != '\0' && i < len)
	{
		hash_code = hash_code * 131 + str[i];
		i++;
	}

	return (int)(hash_code % 5001);
}

void init()
{
	//每个计算一下 4~8个字符的hash
	int i = 0;
	int j = 0;
	for (i = 4; i <= 8; i++)
	{
		for (j = 0; j < 5001; j++)
		{
			datalst[i][j].freq = 0;
			datalst[i][j].next = 0;
			datalst[i][j].word[0] = '\0';
		}
	}
}

void search(char searchWord[SEARCHWORD_MAXLEN + 1])
{
	int len = mstrlen(searchWord);
	int i = 0;

	if (len > 8)
		len = 8;
	for (i = 4; i <= len; i++)
	{
		int hash_code = get_hash_code(searchWord, i);
		int is_exist = false;

		NODE *tmp = &datalst[i][hash_code];
		while (tmp != 0)
		{
			if (mstrcmp(tmp->word, searchWord) == 0)
			{
				tmp->freq++;
				is_exist = true;
				break;
			}
			if (tmp->next == 0)
			{
				break;
			}
			tmp = tmp->next;
		}

		if (is_exist == false)
		{
			NODE *node = new NODE();
			node->freq = 1;
			node->next = 0;
			mstrcpy(node->word, searchWord);
			tmp->next = node;
		}
	}
}

NODE get_max_value()
{
	int i = 0;
	int j = 0;
	NODE max;
	max = result[0];
	int cnt = 0;
	for (i = 1; i < result_cnt; i++)
	{
		if (result[i].freq > max.freq)
		{
			max = result[i];
			cnt = i;
		}
		else if (result[i].freq == max.freq)
		{
			int len1 = mstrlen(result[i].word);
			int len2 = mstrlen(max.word);

			int len = len1;
			if (len > len2)
				len = len2;

			bool is_equal = true;
			for (j = 0; j < len; j++)
			{
				if (result[i].word[j] != max.word[j])
				{
					if (result[i].word[j] < max.word[j])
					{
						max = result[i];
						cnt = i;
					}
					is_equal = false;
					break;
				}
			}

			if (is_equal == true)
			{
				if (len2 > len1)
				{
					max = result[i];
					cnt = i;
				}
			}

		}
	}
	result[cnt].freq = 0;
	result[cnt].word[0] = '\0';


	return max;
}

int autoComplete(char prefix[PREFIX_MAXLEN + 1], char retWords[5][SEARCHWORD_MAXLEN + 1])
{
	for (int j = 0; j < 100; j++)
	{
		result[j].freq = 0;
		result[j].word[0] = '\0';
		result[j].next = 0;
	}
	result_cnt = 0;

	int len = mstrlen(prefix);
	int hash_code = get_hash_code(prefix, len);

	NODE *tmp = &datalst[len][hash_code];
	while (tmp != 0)
	{
		if (mstrncmp(tmp->word, prefix, len) == 0)
		{
			result[result_cnt].freq = tmp->freq;
			mstrcpy(result[result_cnt].word, tmp->word);
			result_cnt++;
		}
		if (tmp->next == 0)
		{
			break;
		}
		tmp = tmp->next;
	}

	for (int i = 0; i < result_cnt && i < 5; i++)
	{
		mstrcpy(retWords[i], get_max_value().word);
	}

	if (result_cnt > 5)
		result_cnt = 5;
	return result_cnt;
}
#endif

  

posted @ 2019-04-13 15:28  调皮的贝叶斯  阅读(169)  评论(0)    收藏  举报