几种排序的比较- bitmapsort,qsort,set

今天看了编程珠玑column1,其中Problem 1是假设内存足够大,如何用库函数实现对100w个数的排序,而Problem2是让实现bit vector操作(bset,clear, test),Problem3让实现bitmap sort 并和Problem1的sort做比较。

先来讲一下bit vetcor,它其实是数据压缩的一种方式。思想是利用每一个bit代表一个数,1个int 有32bit ,故一个int可以表示32个数。举个例子说,现在有一个int a,那么a的低位到高位分别代表0~31(每一位是1或0,1表示有该数,0表示没有);假如现在有集合{1,2,5,8},则a的二进制为(高位)00000000 00000000 00000001 00100110(低位)。bit vector 的操作有bset(i),clear(i),test(i)分别是对数i在整数数组中的设置,清除,和检查。

现在假设最多有10000000个数,那么则需要1+10000000/32 个int来存储这10000000个数。

对于每个数i 设置的操作将第i个bit设为1,第i个bit可以看成是第(i/32)个int里边的第(i%32)位。而已知整数右移一位表示除以2,于是i/32可表示为i>>5i%32可表示为i与低八位的与,即 i & 0x1F。于是设置操作为a[i>>5] |= 1<<(i & 0x1F);clear(i)操作则是a[i>>5] &=~( 1<<(i & 0x1F));test(i)操作为a[i>>5] & (1<<(i & 0x1F)).用MASK表示0x1F,BITSPERWORD表示32,SHIFT表示5

代码如下:

 1 #define BITSPERWORD 32
 2 #define SHIFT 5
 3 #define MASK 0x1F
 4 #define N 10000000
 5 int a[1+N/BITSPERWORD];
 6 void bset(int i)
 7 {    a[i>>SHIFT] |= 1<<(i & MASK);}
 8 
 9 void clr(int i)
10 {    a[i>>SHIFT] &= ~(1<<(i & MASK));}
11 
12 int test(int i)
13 {    return a[i>>SHIFT] & (1<<(i & MASK));}

 

bitmap sort的思想很简单,首先将N个数clear(即设为0),然后对读取的每一个数在数组上设为1,输出的时候将含1的数(test(i))从小到大输出即可。下面给出的是含对文件的操作。不需要对文件操作可自行修改。

code:

 1 void bitSort()
 2 {
 3     int i;
 4     for (i =0; i < N; ++i)
 5     {    
 6         clr(i);
 7     }
 8     FILE *fp1 = NULL;//read file
 9     FILE *fp2 = NULL;//write another file
10 
11     if(NULL == (fp1 = fopen("data.txt","r")))
12         throw ("open file failed!");
13     /*while (scanf("%d",&i) != EOF)
14     {
15         bset(i);
16     }*/
17     //文件读取改写:
18     while (fscanf(fp1,"%d",&i)!=EOF)
19     {
20         bset(i);
21     }
22     fclose(fp1);
23     fp1 = NULL;
24     fp2 = fopen("bitsortData.txt","w+");
25     for (i = 0; i < N; ++i)
26     {
27         if (test(i))
28         {
29             fprintf(fp2,"%d\n",i);
30         }
31     }
32     fclose(fp2);
33     fp2 = NULL;
34     printf("Bitsort complete!\n");
35 }

 

为了测试该函数,还需要生成1000000个互不相等的整数。下面给出该函数

 1 //生成k个整数,范围0~N,N为上面定义的,并写到文件data.txt
 2 void produceRand(int k)
 3 {
 4     int *buf = (int*)malloc(N * sizeof(int));
 5     if(NULL == buf)return;
 6     FILE *fp = NULL;
 7     fp = fopen("data.txt","w+");
 8     if (fp == NULL)
 9     {
10         throw ("can't open file!");
11     }
12     int i;    
13     for (i = 0; i < N; ++i)
14     {
15         buf[i] = i;
16     }
17     for (i = 0; i < k; ++i)
18     {
19         //生成i~n-1之间的随机数
20         int randi = i + (int)(rand() / (RAND_MAX + 1.0) * (N-1-i));
21         //swap(i, randi)
22         int tmp = buf[i];        
23         buf[i] = buf[randi];
24         buf[randi] = tmp;
25         fprintf(fp,"%d\n",buf[i]);
26     }
27     printf("Write file complete\n");
28     fclose(fp);
29     fp = NULL;
30     free(buf);
31     buf = NULL;
32 }


使用qsort对文件data.txt中的数排序:

 1 //比较函数
 2 int intcomp(const void *x, const void *y)
 3 {    return *(int*)x - *(int*)y;}
 4 
 5 void myQsort()
 6 {
 7     //申请空间,a[1000000]
 8     int *a = (int*)malloc(1000000 * sizeof(int));
 9     if (a == NULL) return;
10     int i,n;
11     FILE *fp1 = NULL;//read file
12     FILE *fp2 = NULL;//write another file
13 
14     if(NULL == (fp1 = fopen("data.txt","r")))
15         throw ("open file failed!");
16     n = 0;
17     //读入数据
18     while (fscanf(fp1,"%d",&a[n])!=EOF)
19     {
20         n++;
21     }
22     fclose(fp1);
23     fp1 = NULL;
24     //使用qsort排序
25     qsort(a, n, sizeof(int), intcomp);
26     //将结果写入文件
27     fp2 = fopen("qsortData.txt","w+");
28     for (i = 0; i < n; ++i)
29     {
30         fprintf(fp2,"%d\n",a[i]);
31     }
32     fclose(fp2);
33     fp2 = NULL;
34     printf("Qsort complete!\n");
35     free(a);
36     a = NULL;
37 }


使用C++ STL中的set对文件data.txt中的数排序:

 1 void setSort()
 2 {    
 3     set<int> S;
 4     int i;
 5     set<int>:: iterator j;
 6     FILE *fp1 = NULL;//read file
 7     FILE *fp2 = NULL;//write another file
 8 
 9     if(NULL == (fp1 = fopen("data.txt","r")))
10         throw ("open file failed!");
11     //插入数据
12     while (fscanf(fp1,"%d",&i)!=EOF)
13     {
14         S.insert(i);
15     }
16     fclose(fp1);
17     fp1 = NULL;
18     //将结果写入文件
19     fp2 = fopen("setsortData.txt","w+");
20     for (j = S.begin(); j != S.end(); ++j)
21     {
22         fprintf(fp2,"%d\n", *j);
23     }
24     fclose(fp2);
25     fp2 = NULL;
26     printf("Setsort complete!\n");
27 }


主函数为:

 1 #include <stdio.h>
 2 #include <stdlib.h>
 3 #include <set>
 4 #include <time.h>
 5 using namespace std;
 6 int main()
 7 {    
 8 //    produceRand(1000000);
 9     clock_t start,finish;//计时
10     double duration;
11     start = clock();
12     bitSort();
13     finish = clock();
14     duration = (double)(finish - start) / CLOCKS_PER_SEC;
15     printf("bitsort time: %lfs\n", duration);
16 
17     start = clock();
18     myQsort();
19     finish = clock();
20     duration = (double)(finish - start) / CLOCKS_PER_SEC;
21     printf("qsort time: %lfs\n", duration);
22 
23     start = clock();
24     setSort();
25     finish = clock();
26     duration = (double)(finish - start) / CLOCKS_PER_SEC;
27     printf("setsort time: %lfs\n", duration);
28                   return 029 }


运行该程序得到三种排序的比较效果:

可以发现bitmap sort最快,使用set 排序最慢。而且bitmap sort使用的空间也少,qsort需要1000000个int,而bitmap sort则只需要1+ 1000000/32个int。可见bitmap sort的强大和高效!

 

 

posted @ 2013-05-09 21:02  hust_枫  阅读(319)  评论(0编辑  收藏  举报