OpenMP的效果
今天简单地把#pragma omp parallel for private(j)语句加在一个练习时做的求解Laplace方程的有限差分程序上,4万个点的网格上,在P4超线程的CPU上,使得365秒减少到314秒,50秒的差距呀。
ps:然而在使用SOR方法时,4万个点的网格,单线程只用了14秒,
,以后有时间再研究一下高斯赛德尔的SOR多线程版本,看有什么改进。
附带程序:
1
#include <omp.h>
2
#include <iostream>
3
#include <fstream>
4
#include <cmath>
5
#include <iomanip>
6
using namespace std;
7
//迭代终止误差
8
const double eps=1e-7;
9
//网格划分
10
const int ndivx=200;
11
const int ndivy=200;
12
const int nNx=ndivx+1;
13
const int nNy=ndivy+1;
14
const double deltax=1.0/ndivx;
15
const double deltay=1.0/ndivy;
16
//常数
17
const double PI=3.14159265;
18
const double omiga=1.902;//松弛因子
19
20
21
22
23
int main(void)
24
{
25
// omp_set_num_threads(2);
26
Timer tm;
27
tm.start();
28
29
int j,i;
30
double error=0;
31
double** uold=new double*[nNy];
32
double** unew=new double*[nNy];
33
for ( i=0;i<nNx;i++)
34
{
35
uold[i]=new double [nNx];
36
unew[i]=new double [nNx];
37
for ( j=0;j<nNx;j++)
38
{
39
uold[i][j]=unew[i][j]=0;
40
}
41
}
42
43
//边界条件
44
for ( i=0;i<nNx;i++)
45
{
46
uold[0][i]=unew[0][i]=10+cos(i*PI*deltax);
47
uold[nNy-1][i]=unew[nNy-1][i]=10+cos(i*PI*deltax);
48
}
49
50
for ( i=0;i<nNy;i++)
51
{
52
uold[i][0]=unew[i][0]=10+cos(i*PI*deltay);
53
uold[i][nNx-1]=unew[i][nNx-1]=10+cos(i*PI*deltay);
54
}
55
56
int time=0;
57
double ukn=-1+2*(1.0/(deltax*deltax)+1.0/(deltay*deltay));
58
do{
59
60
#pragma omp parallel for private(j)
61
for ( i=1;i<nNy-1;i++)
62
{
63
for ( j=1;j<nNx-1;j++)
64
{
65
//jacobi
66
unew[i][j]=1.0/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]
67
+(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*uold[i-1][j]
68
+(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]
69
+(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*uold[i][j-1] );
70
//sor
71
/* unew[i][j]=(1-omiga)*uold[i][j]
72
+omiga/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]
73
+(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*unew[i-1][j]
74
+(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]
75
+(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*unew[i][j-1] );*/
76
}
77
}
78
double sum=0;
79
for ( i=1;i<nNy-1;i++)
80
{
81
for ( j=1;j<nNx-1;j++)
82
{
83
sum+=(uold[i][j]-unew[i][j])*(uold[i][j]-unew[i][j]);
84
}
85
}
86
error=sqrt(sum);
87
88
for ( i=0;i<nNy;i++)
89
{
90
for ( j=0;j<nNx;j++)
91
{
92
uold[i][j]=unew[i][j];
93
}
94
}
95
// if(time%100==0)
96
// cout<<++time<<"\t\t"<<error<<endl;
97
if (time>=100000)
98
{
99
break;
100
}
101
}while(error>eps);
102
103
ofstream tecplot("tec.plt");
104
tecplot<<"zone i="<<nNy<<",j="<<nNx<<",f=point\n";
105
for ( i=0;i<nNy;i++)
106
{
107
for ( j=0;j<nNx;j++)
108
{
109
tecplot << setw(15) << i*deltay
110
<< setw(15) << j*deltax
111
<< setw(15) << uold[i][j]<<"\n";
112
}
113
}
114
tecplot.close();
115
for ( i=0;i<nNy;i++)
116
{
117
delete[] uold[i];
118
delete[] unew[i];
119
}
120
delete[] uold;
121
delete[] unew;
122
tm.stop();
123
tm.diplayelapsedtime();
124
return 0;
125
}
#include <omp.h>2
#include <iostream>3
#include <fstream>4
#include <cmath>5
#include <iomanip>6
using namespace std;7
//迭代终止误差8
const double eps=1e-7;9
//网格划分10
const int ndivx=200;11
const int ndivy=200;12
const int nNx=ndivx+1;13
const int nNy=ndivy+1;14
const double deltax=1.0/ndivx;15
const double deltay=1.0/ndivy;16
//常数17
const double PI=3.14159265;18
const double omiga=1.902;//松弛因子19

20

21

22

23
int main(void)24
{25
// omp_set_num_threads(2);26
Timer tm;27
tm.start();28

29
int j,i;30
double error=0;31
double** uold=new double*[nNy];32
double** unew=new double*[nNy];33
for ( i=0;i<nNx;i++)34
{35
uold[i]=new double [nNx];36
unew[i]=new double [nNx];37
for ( j=0;j<nNx;j++)38
{39
uold[i][j]=unew[i][j]=0;40
}41
}42

43
//边界条件 44
for ( i=0;i<nNx;i++)45
{46
uold[0][i]=unew[0][i]=10+cos(i*PI*deltax);47
uold[nNy-1][i]=unew[nNy-1][i]=10+cos(i*PI*deltax);48
}49

50
for ( i=0;i<nNy;i++)51
{52
uold[i][0]=unew[i][0]=10+cos(i*PI*deltay);53
uold[i][nNx-1]=unew[i][nNx-1]=10+cos(i*PI*deltay);54
}55

56
int time=0;57
double ukn=-1+2*(1.0/(deltax*deltax)+1.0/(deltay*deltay));58
do{59

60
#pragma omp parallel for private(j)61
for ( i=1;i<nNy-1;i++)62
{63
for ( j=1;j<nNx-1;j++)64
{65
//jacobi66
unew[i][j]=1.0/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]67
+(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*uold[i-1][j]68
+(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]69
+(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*uold[i][j-1] );70
//sor71
/* unew[i][j]=(1-omiga)*uold[i][j]72
+omiga/ukn*( (1.0/(deltay*deltay)+0.5/deltay*sin(2*PI*i*deltay))*uold[i+1][j]73
+(1.0/(deltay*deltay)-0.5/deltay*sin(2*PI*i*deltay))*unew[i-1][j]74
+(1.0/(deltax*deltax)+0.5/deltax*sin(2*PI*j*deltay))*uold[i][j+1]75
+(1.0/(deltax*deltax)-0.5/deltax*sin(2*PI*j*deltay))*unew[i][j-1] );*/76
}77
}78
double sum=0;79
for ( i=1;i<nNy-1;i++)80
{81
for ( j=1;j<nNx-1;j++)82
{83
sum+=(uold[i][j]-unew[i][j])*(uold[i][j]-unew[i][j]);84
}85
}86
error=sqrt(sum);87
88
for ( i=0;i<nNy;i++)89
{90
for ( j=0;j<nNx;j++)91
{92
uold[i][j]=unew[i][j];93
}94
}95
// if(time%100==0)96
// cout<<++time<<"\t\t"<<error<<endl;97
if (time>=100000)98
{99
break;100
}101
}while(error>eps); 102

103
ofstream tecplot("tec.plt");104
tecplot<<"zone i="<<nNy<<",j="<<nNx<<",f=point\n";105
for ( i=0;i<nNy;i++)106
{107
for ( j=0;j<nNx;j++)108
{109
tecplot << setw(15) << i*deltay110
<< setw(15) << j*deltax111
<< setw(15) << uold[i][j]<<"\n";112
}113
}114
tecplot.close();115
for ( i=0;i<nNy;i++)116
{117
delete[] uold[i];118
delete[] unew[i];119
}120
delete[] uold;121
delete[] unew;122
tm.stop();123
tm.diplayelapsedtime();124
return 0;125
}



浙公网安备 33010602011771号