“并行程序VS串行程序”——并行程序优化实录


在多核处理器、超级计算机日益普及的今天,程序员们怎能对并行程序“袖手旁观”呢?

为了练手,我用MPI写了一个并行排序程序,

先介绍下我的第一个版本,大概的思路是:

使用MPI在各个进程之间进行通信,

1. 进程0生成随机数,并且讲数据分段,将各段数据分配给其他进程

2. 其他进程收到数据段,使用冒泡排序进行,发送回进程0

3. 进程0收到这些数据,通过归并排序按顺序整合起来。

 下面是这个版本代码,

View Code
// MPI Hello World demo
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define N 30

int main(int argc, char** argv)
{

int processRank, processNum, t, data, num;
int dataArr[N];
int dataArrB[N];
int pointer[100];
int secEnd[100];

MPI_Status mpistat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &processNum);
MPI_Comm_rank(MPI_COMM_WORLD, &processRank);


printf("Yes, Sir!\nFrom process %i of %i\n", processRank, processNum);
if(processRank == 0)
{
srand(time(NULL));

for (int i = 0; i < N; i++){
dataArr[i] = rand()%1000;
}
printf("Original Array:\n");
for (int i = 0; i< N; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
puts("Distribute data to processes");
for (int i = 1; i < processNum; i++){
num = (N/(processNum-1));
if (i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d...\n", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
///gather the sorted data
printf("Receiving from process %d...\n", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if (i == processNum-1 ) secEnd[i] = N;
}
printf("Sorted Sections Array:\n");
for (int i = 0; i< N; i++){
printf("%d ", dataArrB[i]);
}
puts("");
///merge the sorted sections
puts("Merging...");
for (int i = 0; i < N; i++){
int tMin = 1;
int min = 10000;
for (t = 1; t < processNum; t++){
if (pointer[t] < secEnd[t] && dataArrB[pointer[t]] < min){
min = dataArrB[pointer[t]];
tMin = t;
}
}
dataArr[i] = dataArrB[pointer[tMin]];
pointer[tMin]++;
}
///output the results
printf("Final Sorted Array:\n");
for (int i = 0; i< N; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
}
else
{
//receieve the section
MPI_Recv(&num, 1, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
MPI_Recv(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
printf("Received Original Array:\n");
for (int i = 0; i< num; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
//sort this section
for (int i = 0; i < num -1; i++)
for (int j = num-1; j>=i+1; j--)
if (dataArr[j] < dataArr[j-1]){
int tmp = dataArr[j];
dataArr[j]= dataArr[j-1];
dataArr[j-1] = tmp;
}
MPI_Send(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD);
///display
printf("My Sorted Section:\n");
for (int i = 0; i< num; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
}
MPI_Finalize();
return 0;
}

自己写出之后当然高兴,不过程序经过高手检查之后,提出了一些问题。

最要命的是这个

for (int i = 1; i < processNum; i++){
num = (N/(processNum-1));
if (i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d...\n", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
///gather the sorted data
printf("Receiving from process %d...\n", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if (i == processNum-1 ) secEnd[i] = N;
}

这段程序彻底抹杀掉了我这个并行程序的光辉形象,因为这段煞有介事的并行程序,其实是一段串行程序。

屏幕前的高手应该看出来了吧,同一段程序的收发,都在同一段循环中。

也就意味着,不同段之间的收发是一个接着一个的。也就意味着,其他每个进程各自的排序也是一个接着一个进行的,并不会如我初衷并行排序。

想来,这段错误应该是并行程序小白们常犯的错误,所以我也很乐于把我做过的蠢事发出来给大家分享。前车之鉴,警钟长鸣lol

改正之后的这段程序是这样的,

for (int i = 1; i < processNum; i++){
num = (N/(processNum-1));
if (i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d...\n", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
}
for (int i = 1; i < processNum; i++){
num = (N/(processNum-1));
if (i == processNum -1)
num = N - num * (processNum -2);
///gather the sorted data
printf("Receiving from process %d...\n", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if (i == processNum-1 ) secEnd[i] = N;
}

 

同时程序的效率还可以提升,比如说把其他进程排序的算法换成快排什么的。

最后奉上优化后的版本,

View Code
// MPI Hello World demo
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h> // 'qsort' is in it.
#include <time.h>
#include <map>
#define N 30

int QuickSortCompareFun(const void *p1, const void *p2)
{
return *((const int*)p1) - *((const int*)p2);
}

int main(int argc, char** argv)
{

int processRank, processNum, t, data, num;
int dataArr[N];
int dataArrB[N];
int pointer[100];
int secEnd[100];

MPI_Status mpistat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &processNum);
MPI_Comm_rank(MPI_COMM_WORLD, &processRank);


printf("Yes, Sir!\nFrom process %i of %i\n", processRank, processNum);
if(processRank == 0)
{
srand(time(NULL));

for (int i = 0; i < N; i++){
dataArr[i] = rand()%1000;
}
printf("Original Array:\n");
for (int i = 0; i< N; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
puts("Distribute data to processes");
for (int i = 1; i < processNum; i++){
num = (N/(processNum-1));
if (i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d...\n", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
}
for (int i = 1; i < processNum; i++){
num = (N/(processNum-1));
if (i == processNum -1)
num = N - num * (processNum -2);
///gather the sorted data
printf("Receiving from process %d...\n", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if (i == processNum-1 ) secEnd[i] = N;
}
printf("Sorted Sections Array:\n");
for (int i = 0; i< N; i++){
printf("%d ", dataArrB[i]);
}
puts("");
///merge the sorted sections
puts("Merging...");
std::map<int, int> data2rank;
for (t = 1; t < processNum; t++){
if (pointer[t] < secEnd[t]){
data2rank.insert(std::make_pair<int, int>(dataArrB[pointer[t]], t));
pointer[t]++;
}
}
for (int i = 0; i < N; i++){
int data = data2rank.begin()->first;
int rank = data2rank.begin()->second;
dataArr[i] = data;
data2rank.erase(data2rank.begin());
if (pointer[rank] < secEnd[rank])
{
data2rank.insert(std::make_pair<int, int>(dataArrB[pointer[rank]], rank));
pointer[rank]++;
}
}
///output the results
printf("Final Sorted Array:\n");
for (int i = 0; i< N; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
}
else
{
//receieve the section
MPI_Recv(&num, 1, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
MPI_Recv(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
printf("Received Original Array:\n");
for (int i = 0; i< num; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
//sort this section
qsort(dataArr, num, sizeof(int), QuickSortCompareFun);

MPI_Send(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD);
///display
printf("My Sorted Section:\n");
for (int i = 0; i< num; i++){
printf("%d ", dataArr[i]);
}
printf("\n");
}
MPI_Finalize();
return 0;
}

 

希望我的这段并行程序初探对您有帮助:-)

posted @ 2011-11-16 22:22  Rosting  阅读(4650)  评论(4编辑  收藏  举报