词频统计器(第三版)

为了进行效能测试,将程序分块来写,并按单词出现次数进行了排序

#include<stdio.h>
#include<string.h>
#include<iostream>
#include<fstream>
#include<string>
#include<map> 
#include <iomanip>
#include<vector>
#include<algorithm>
#include<fstream>
using namespace std;
FILE *fp1;
map<string, int>my_map;
vector<pair<string, int> > v_result;
int cmp(const pair<string, int> &x, const pair<string, int> &y)
{
	return x.second>y.second;
}
void sortMapbyValue(map<string, int>&my_map, vector<pair<string, int> >&t_vec)
{
	for (map<string, int>::iterator iter = my_map.begin(); iter != my_map.end(); iter++)
	{
		t_vec.push_back(make_pair(iter->first, iter->second));
	}
	sort(t_vec.begin(), t_vec.end(), cmp);
}
int sum()
{
	char text[1024];
	int n = 0;
	int i;
	while (fgets(text, 1024, fp1) != NULL)
	{
		i = 0;
		while (text[i] != '\0')
		{
			char s[30];
			int j = 0;
			while ((text[i] >= 'a'&&text[i] <= 'z') || (text[i] >= 'A'&&text[i] <= 'Z') || text[i] == '-')
			{
				if (text[i] >= 'A'&&text[i] <= 'Z')
					text[i] += 'a' - 'A';
				s[j++] = text[i++];
			}
			s[j] = '\0';
			if (my_map[s] == 0)
				n++;
			my_map[s]++;
			if (text[i] == '\0') break;
			else
				i++;
		}
	}
	fclose(fp1);
	return n;
}
void paixu()
{
	sortMapbyValue(my_map, v_result);
}
void out(int n)
{
	ofstream outfile("d:\\4.txt", ios::binary);
	outfile << "total" << "  " << n << "\r\n" << "\r\n";
	for (int i = 0; i<v_result.size(); i++)
	{
		if (v_result[i].first != "")
		{
			cout << left;
			outfile << setw(10) << v_result[i].first << setw(10) << v_result[i].second << "\r\n";
			cout << '\n';
		}
	}
	outfile.close();
}
int main()
{
	fp1 = fopen("d:\\2.txt", "r");
	int n = sum();
	paixu();
    out(n);
	return 0;
}

测试例子:战争与和平 

结果:

 

posted @ 2016-09-28 15:15  王森123  阅读(130)  评论(1编辑  收藏  举报