OpenMP - 编译制导(五)task、team
task制导
显式地将代码块作为任务创建,并将这些任务分配给线程池中的线程执行,多个任务可以并行执行,每个任务都可能在不同的线程上执行。通过#pragma omp task
指令来创建任务,以及通过#pragma omp taskwait
指令来等待任务的完成。
#include <iostream>
#include <omp.h>
using namespace std;
void func(int task){
cout << "thread " << omp_get_thread_num() << " is excuting task " << task << endl;
}
int main(int argc, char* argv[]){
#pragma omp parallel
{
#pragma omp single
{
#pragma omp task
{
func(1);
}
#pragma omp task
{
func(2);
}
#pragma omp taskwait
cout << "taskwait..." << endl;
#pragma omp task
{
func(3);
}
}
}
return 0;
}
task
thread 5 is excuting task 2
thread 4 is excuting task 1
taskwait...
thread 0 is excuting task 3
no taskwait
taskwait...
thread 3 is excuting task 3
thread 0 is excuting task 1
thread 4 is excuting task 2
taskgroup
用于创建一个任务组,任务组是一组任务的集合,它们可以相互等待,直到任务组中的所有任务都执行完毕。使用语句task_reduction
和in_reduction
进行结果规约。
#include <iostream>
#include <omp.h>
using namespace std;
void func(int task){
cout << "thread " << omp_get_thread_num() << " is excuting task " << task << endl;
}
int main() {
int total = 0;
#pragma omp parallel
{
#pragma omp single
{
#pragma omp taskgroup task_reduction(+:total)
{
#pragma omp task in_reduction(+:total)
{
func(1);
int partial_sum = 0;
for (int i = 1; i <= 50; ++i) {
partial_sum += i;
}
total += partial_sum;
}
#pragma omp task in_reduction(+:total)
{
func(2);
int partial_sum = 0;
for (int i = 51; i <= 100; ++i) {
partial_sum += i;
}
total += partial_sum;
}
}
}
}
cout << "total = " << total << endl;
return 0;
}
group
thread 1 is excuting task 1
thread 3 is excuting task 2
total = 5050
#pragma omp taskgroup
:定义一个任务组。任务组中的任务可以并行执行,但它们共享相同的归约操作。task_reduction(+:total)
:指定了total
变量将使用加法归约操作。这意味着当任务组中所有任务完成后,total
的值将是所有任务中对total
进行的所有加法操作的总和。#pragma omp task in_reduction(+:total)
:定义了一个任务,并且这个任务将参与total
的归约操作。这意味着在这个任务中对total
的任何加法操作都会参与到最终的归约中。
teams制导
用于创建并行执行的团队,每个团队包含一组线程,实现多级并行性,即同时利用多个处理器核心和每个核心上的多个线程进行并行计算。
#include <iostream>
#include <omp.h>
using namespace std;
int main(int argc, char* argv[]){
int d[36], n[36];
#pragma omp teams num_teams(4) thread_limit(2)
{
#pragma omp parallel for
for (int i = 0; i < 36; ++i) {
d[i] = omp_get_thread_num();
n[i] = omp_get_team_num();
}
}
for (int i = 0; i < 36; ++i) {
cout << "thread " << d[i] << " in team " << n[i] << " i = " << i << endl;
}
return 0;
}
。。。