openmp_demos

分享8个openmp的小demo

demo1

 1 #include <stdio.h>
 2 #include <omp.h> 
 3 int main(void)
 4 {
 5     int coreNum = omp_get_num_procs();//获得处理器个数
 6     printf(" Core Num is %d \n", coreNum);
 7     #pragma omp parallel
 8     {int k = omp_get_thread_num();//获得每个线程的ID
 9     printf("ID: %d Hello, world.\n",k);}
10     return 0;
11 }

并行执行,会将{}里面的程序执行4(线程数)遍

 demo2

 1 #include <stdio.h>
 2 #include <omp.h> 
 3 int main(int argc, char **argv)
 4 {
 5     int a[1000000];
 6 
 7     #pragma omp parallel for
 8     for (int i = 0; i < 100; i++) {
 9         a[i] = 2 * i;    
10 printf("%d---",a[i]);
11     }
12 printf("-----------------------------");
13     #pragma omp parallel 
14     {
15         int i;
16         #pragma omp for
17         for (i = 0; i < 5; i++)
18             printf("i = %d\n", i);
19     }
20     return 0;
21     return 0;
22 }

两种循环写法,执行出来结果有差异

 demo3

 1 #include<stdio.h>
 2 #include<stdlib.h>
 3 #include<omp.h>
 4 #include <unistd.h>
 5 int main()
 6 {
 7    printf("parent threadid:%d\n",omp_get_thread_num());
 8    #pragma omp  sections
 9    {
10      #pragma omp section
11      {
12           printf("section 0,threadid=%d\n",omp_get_thread_num());
13           sleep(1);
14      }
15      #pragma omp section
16      {
17           printf("section 1,threadid=%d\n",omp_get_thread_num());
18           //sleep(1);
19      }
20      #pragma omp section
21      {
22           printf("section 2,threadid=%d\n",omp_get_thread_num());
23           sleep(1);
24      }
25    }
26    #pragma omp parallel sections
27    {
28       #pragma omp section
29      {
30           printf("section 3,threadid=%d\n",omp_get_thread_num());
31           sleep(1);
32      }
33       #pragma omp section
34      {
35           printf("section 4,threadid=%d\n",omp_get_thread_num());
36           sleep(1);
37      }
38       #pragma omp section
39      {
40           printf("section 5,threadid=%d\n",omp_get_thread_num());
41           sleep(1);
42      }
43    }
44  return 0;
45 }

 多个sections按顺序执行,sections有很多section,如果并行执行则多个线程一起工作,顺序会乱;如果不是并行执行则从上往下执行,只有一个线程工作

demo4

 1 #include <omp.h>  
 2 #include <stdio.h>  
 3 int main( )   
 4 {  
 5     int a[5], i;  
 6     #pragma omp parallel  
 7     {  
 8         // Perform some computation.  
 9         #pragma omp for  
10         for (i = 0; i < 5; i++)  
11             {a[i] = i * i; 
12              printf(" A ---- Thread %d !!!\n", omp_get_thread_num());
13             }
14 
15         // Print intermediate results.  
16         #pragma omp master  
17             for (i = 0; i < 5; i++)  
18                 {printf("a[%d] = %d ", i, a[i]); 
19                 printf(" ---- Thread %d !!!\n", omp_get_thread_num());
20                 } 
21 
22         // Wait.  
23         #pragma omp barrier  // 先执行完毕的线程执行到这里会停下,直到所有线程都执行完再继续执行。
24                              // 可以尝试注释掉看看效果, 执行数学会乱掉。
25         // Continue with the computation.  
26         #pragma omp for  
27         for (i = 0; i < 5; i++)  
28             {a[i] += i; 
29             printf(" B---- Thread %d !!!\n", omp_get_thread_num());
30             } 
31     }  
32 }  

 去掉 omp -barrier后

barrer应该是配合master使用的,如果里面全是for循环,应该是按照#从上往下执行,#里面的循环乱序。

demo5

 1 #include <omp.h>
 2 #include <stdio.h>
 3 #include <time.h>
 4 #include <iostream>
 5 static long num_steps = 28; 
 6 double step;
 7 double x; // x 必须是全局变量, 局部变量会报错
 8 #define NUM_THREADS 4
 9 #pragma omp threadprivate(x) 
10 int main ()
11 { 
12     int i; 
13     double  pi, sum = 0.0; // 多个变量定义方式
14     step = 1.0/(double) num_steps;
15     omp_set_num_threads(NUM_THREADS); // 设置使用的线程数
16     const clock_t begin_time = clock(); // 统计一下使用的时间
17     #pragma omp parallel for  reduction(+:sum)
18     for (i=0;i< num_steps; i++){
19         printf("i: %d --- x: %f -- sum: %f---- Thread %d !!!\n",i, x, sum, omp_get_thread_num());
20         x = (i+0.5)*step;
21         sum = sum + 4.0/(1.0+x*x);
22         }
23     pi = step * sum;
24     printf("x: %f --  pi: %f---- Thread %d !!!\n", x, pi, omp_get_thread_num());
25     std::cout << "Time Cost: "<<float( clock () - begin_time ) /  CLOCKS_PER_SEC << std::endl; 
26     return 0;
27 }

 reduction(+:sum)它的意思是告诉编译器:下面的for循环你要分成多个线程跑,但每个线程都要保存变量sum的拷贝,循环结束后,所有线程把自己的sum累加起来作为最后的输出。

 demo6

 1 #include <stdio.h>  
 2 #include <omp.h>  
 3 #include <time.h>
 4 #define MAX 100  
 5 int main() {  
 6 
 7    int count = 0;  
 8    #pragma omp parallel num_threads(MAX)  
 9    {  
10       #pragma omp atomic  //这里锁住了 count, 任何时候只能一个线程去修改访问count
11       count++;  
12       printf(" Thread : %d --- Count : %d !!\n", omp_get_thread_num(), count);
13    }  
14    printf("Number of threads: %d\n", count);  
15 } 

 说的有100个线程,其实还是那四个线程在跑

 1     #include <iostream>  
 2     #include <omp.h> // OpenMP编程需要包含的头文件  
 3     int main()  
 4     {  
 5         int sum = 0;   
 6         std::cout << "Before: " << sum << std::endl;  
 7     #pragma omp parallel for  
 8         for (int i = 0; i < 100; ++i)   
 9         {  
10     #pragma omp critical (sum)  
11             {  
12               sum = sum + i;  
13               sum = sum + i * 2;  
14             }  
15         }  
16         std::cout << "After: " << sum << std::endl;  
17         return 0;  
18     }

 demo7

 1 #include <iostream>  
 2 #include <omp.h> // OpenMP编程需要包含的头文件  
 3 int main()  
 4 {  
 5 #pragma omp parallel  
 6     {  
 7 #pragma omp for nowait  
 8         for (int i = 0; i < 100; ++i)   
 9         {  
10             std::cout  << "++" ;  
11         }  
12 #pragma omp for  
13         for (int j = 0; j < 10; ++j)   
14         {  
15             std::cout  << "--";  
16         }  
17     }  
18 
19     return 0;  
20 }  

 并行for循环后面自带隐式barrer,它像一堵墙一样,不执行完i<100那个循环是无法执行下一个循环的,但是设置nowait后,不必等到第一个循环结束,第二个就可以开始了

demo8

 1     #include <iostream>  
 2     #include <omp.h> // OpenMP编程需要包含的头文件  
 3     int main()  
 4     {  
 5     #pragma omp parallel for schedule(static, 2) //static调度策略,for循环每两次迭代分成一个任务  
 6         for (int i = 0; i < 10; ++i) //被分成了5个任务,其中循环0~1,4~5,8~9分配给了第一个线程,其余的分配给了第二个线程  
 7         {  
 8             std::cout << "Thread ID:" << omp_get_thread_num() << " Value:" << i << std::endl;  
 9         }  
10 
11         return 0;  
12     }  

 静态调度,那个线程执行哪个任务已经分配好了

 1     #include <iostream>  
 2     #include <omp.h> // OpenMP编程需要包含的头文件  
 3     int main()  
 4     {  
 5     #pragma omp parallel for schedule(dynamic, 2) //dynamic调度策略,for循环每两次迭代分成一个任务  
 6         for (int i = 0; i < 20; ++i) //被分成了10个任务,只要有任务并且线程空闲,那么该线程会执行该任务  
 7         {  
 8             std::cout << "Thread ID:" << omp_get_thread_num() << " Value:" << i << std::endl;  
 9         }  
10 
11         return 0;  
12 }

 两个循环作为一次任务,谁空闲谁执行。

 

posted @ 2021-11-02 17:33  李点  阅读(101)  评论(0)    收藏  举报