【整理】在一亿个数中寻找出现频率最多的4个 <转>

本文中所有算法均搜集于网络,在此感谢各位大师的无私奉献。

主函数:

1 const int max = 1000; // 最大取值范围
2 //const int count = 5000; // 小数据量
3   const int count = 100000000; // 数据量
4   const int resultLen = 4; // 返回长度
5  
6 static void Main(string[] args)
7 {
8 var random = new Random(2010); // 固定随机种子,确保大家测试数据一致
9   var data = new int[count];
10 #region 生成测试数据,不在性能计算之内
11 for (var i = 0; i < count; i++) data[i] = random.Next(max);
12 #endregion 生成测试数据
13
14 #region 计算性能
15 Console.WriteLine("Zswang_0 开始运行");
16 var tick = Environment.TickCount;
17 foreach (var i in Zswang_0(data, 4))
18 {
19 Console.WriteLine(i);
20 }
21 Console.WriteLine("共耗时{0}毫秒", Environment.TickCount - tick);
22 #endregion
23
24 Console.ReadKey();
25 }
26  

 

算法一:

1 static int[] Zswang_0(int[] data, int len)
2 {
3 // 计算每个数据出现的次数
4   var dict = new int[max];
5 for (var i = 0; i < count; i++) dict[data[i]]++;
6
7 // 按出现的次数排序
8   var indexs = new int[max];
9 for (var i = 0; i < max; i++) indexs[i] = i; // 获得完整的序号
10 Array.Sort(indexs, delegate(int a, int b)
11 {
12 return dict[b] - dict[a];
13 });
14 /*
15 for (var i = 0; i < 100; i++)
16 {
17 Console.WriteLine("{0}={1}", indexs[i], dict[indexs[i]]);
18 }
19 //*/
20 var result = new int[len];
21 for (var i = 0; i < len; i++) result[i] = indexs[i]; // 输出
22 return result;
23 }
24

运行结果如下:

 

算法二:

1 static int[] ChrisAK_0(int[] data, int len)
2 {
3 // 计算每个数据出现的次数
4 int ncpu = Environment.ProcessorCount;
5 var dict = new int[ncpu][];
6 int part = count / ncpu;
7 if (ncpu > 1)
8 {
9 Thread[] threads = new Thread[ncpu - 1];//每个cpu分配一个线程
10 for (var i = 1; i < ncpu; ++i)
11 {
12 dict[i] = new int[max];//分配线程用的计数器
13 var th = new Thread((baseidx) =>
14 {
15 int bidx = (int)baseidx;
16 int start = bidx * part;
17 int end = start + part;
18 for (var j = bidx * part; j < end; j++)
19 {
20 ++dict[bidx][data[j]];
21 }
22 });
23 th.Start(i);
24 threads[i - 1] = th;
25 }
26 dict[0] = new int[max];//主线程开始计算自己的部分
27 for (var i = 0; i < part; i++)
28 {
29 dict[0][data[i]]++;
30 }
31 for (var i = 1; i < ncpu; ++i)// 等待其它线程结束
32 threads[i - 1].Join();
33 for (var i = 0; i < max; ++i)//合并结果
34 {
35 int sum = 0;
36 for (var j = 1; j < ncpu; ++j)
37 sum += dict[j][i];
38 dict[0][i] += sum;
39 }
40 for (int i = ncpu * part; i < count; ++i)//处理某些CPU内核个数无法整除的情况
41 dict[0][1]++;
42 }
43 else
44 {
45 for (var i = 0; i < part; i++)//单线程的情况.
46 {
47 dict[0][data[i]]++;
48 }
49 }
50 // 按出现的次数排序
51 var indexs = new int[max];
52 for (var i = 0; i < max; i++) indexs[i] = i; // 获得完整的序号
53 Array.Sort(indexs, delegate(int a, int b)
54 {
55 return dict[0][b] - dict[0][a];
56 });
57 for (var i = 0; i < 50; i++)
58 {
59 Console.WriteLine("{0}={1}", indexs[i], dict[0][indexs[i]]);
60 }
61 var result = new int[len];
62 for (var i = 0; i < len; i++) result[i] = indexs[i]; // 输出
63 return result;
64 }
65

运行结果如下:

 

算法三:

1 static int[] Wuyi8808_0(int[] data, int len)
2 {
3 // 计算每个数据出现的次数
4 var dict = new int[max];
5 for (var i = 0; i < count; i++) dict[data[i]]++;
6 // 奇怪,这里如果改用下一行,速度反而更慢:
7 // foreach (var x in data) dict[x]++;
8
9 // 按出现的次数排序
10 var indexs = new int[max];
11 for (var i = 0; i < max; i++) indexs[i] = i; // 获得完整的序号
12 Array.Sort(dict, indexs); // 似乎这样就可以了,不过速度和伴水的是一样的
13 //*
14 for (var i = max - 1; i > max - 51; i--)
15 {
16 Console.WriteLine("{0,4}={1}", indexs[i], dict[i]);
17 }
18 //*/
19 var result = new int[len];
20 for (var i = 0; i < len; i++) result[i] = indexs[max - i - 1]; // 输出
21 return result;
22 }
23

运行结果如下:

 

算法四:

1 static int[] sp1234_0(int[] data, int len)
2 {
3 var dict = new int[max];
4 for (var i = 0; i < count; i++) dict[data[i]]++;
5 var indexs = new int[max];
6 for (var i = 0; i < max; i++) indexs[i] = i;
7 int f = 0;
8 int t = indexs.Length - 1;
9 int m;
10 loop:
11 m = partition(dict, indexs, f, t);
12 if (m == len)
13 {
14 var result = new int[len];
15 for (var i = 0; i < len; i++) result[i] = indexs[i];
16 return result;
17 }
18 else if (m > len)
19 t = m - 1;
20 else
21 f = m + 1;
22 goto loop;
23 }
24
25 static int partition(int[] value, int[] data, int i, int j)
26 {
27 var x = i++;
28 var y = value[data[x]];
29 int m;
30 begin:
31 while (i <= j && value[data[i]] >= y)
32 i++;
33 while (j >= i && value[data[j]] <= y)
34 j--;
35 if (i > j)
36 {
37 m = data[x];
38 data[x] = data[j];
39 data[j] = m;
40 return j;
41 }
42
43 m = data[i];
44 data[i] = data[j];
45 data[j] = m;
46 i++;
47 j--;
48 goto begin;
49 }
50

运行结果如下:

 

算法五:

1 static int[] Wuyi8808_1(int[] data, int len)
2 {
3 int[] dict = new int[max];
4 int[] indexs = new int[max];
5 int[] result = new int[len];
6 unsafe
7 {
8 fixed (int* a = data, b = dict, c = indexs)
9 {
10 for (int i = 0; i < count; i++) b[a[i]]++;
11 for (int i = 0; i < max; i++) c[i] = i;
12 Array.Sort(dict, indexs);
13 for (int i = 0; i < len; i++) result[i] = c[max - i - 1];
14 }
15 }
16 return result;
17 }
18

运行结果如下:

 

算法六:

1 struct set
2 {
3 public int count;
4 public int key;
5 }
6 static int[] LCL_0(int[] data, int len)
7 {
8 // 计算每个数据出现的次数
9 set[] dict = new set[max];
10
11 for (int i = 0; i < count; i++)
12 {
13 dict[data[i]].count++;
14 if (i < max) dict[i].key = i;
15 }
16
17 // 按出现的次数排序
18 Array.Sort(dict, delegate(set a, set b)
19 {
20 return b.count - a.count;
21 });
22
23 int[] result = new int[len];
24 for (int i = 0; i < len; i++) result[i] = dict[i].key; // 输出
25 return result;
26 }
27

运行结果如下:

 

算法七:

1 static int[] Lihanbing_0(int[] data, int len)
2 {
3 // 计算每个数据出现的次数
4 int[] dict = new int[max];
5 for (int i = 0; i < count; i++) dict[data[i]]++;
6
7 // 按出现的次数排序
8 int[] result = new int[len], resultcount = new int[len];
9 int tempResult = 0, tempResult2 = 0;
10 int tempCount = 0, tempCount2 = 0;
11 for (int i = 0; i < max; i++)
12 {
13 tempCount = dict[i];
14 tempResult = i;
15 for (int j = 0; j < len; j++)
16 {
17 if (tempCount <= resultcount[j])
18 continue;
19 tempCount2 = resultcount[j];
20 tempResult2 = result[j];
21 resultcount[j] = tempCount;
22 result[j] = tempResult;
23 tempCount = tempCount2;
24 tempResult = tempResult2;
25
26 }
27 }
28 return result;
29 }
30

运结结果如下:

posted @ 2010-05-17 23:49  温景良(Jason)  Views(551)  Comments(1Edit  收藏  举报