GPU CUDA设备图谱发布

上图可通过以下示例代码生成：

__global__ void launchFireAndForgetGraph(cudaGraphExec_t graph) {
cudaGraphLaunch(graph, cudaStreamGraphFireAndForget);
}
void graphSetup() {
cudaGraphExec_t gExec1, gExec2;
cudaGraph_t g1, g2;
// Create, instantiate, and upload the device graph.
create_graph(&g2);
cudaGraphInstantiate(&gExec2, g2, cudaGraphInstantiateFlagDeviceLaunch);
cudaGraphUpload(gExec2, stream);
// Create and instantiate the launching graph.
cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal);
launchFireAndForgetGraph<<<1, 1, 0, stream>>>(gExec2);
cudaStreamEndCapture(stream, &g1);
cudaGraphInstantiate(&gExec1, g1);
// Launch the host graph, which will in turn launch the device graph.
cudaGraphLaunch(gExec1, stream);
}

上述执行流程可通过以下代码生成：

__global__ void launchTailGraph(cudaGraphExec_t graph) {
cudaGraphLaunch(graph, cudaStreamGraphTailLaunch);
}
void graphSetup() {
cudaGraphExec_t gExec1, gExec2;
cudaGraph_t g1, g2;
// Create, instantiate, and upload the device graph.
create_graph(&g2);
cudaGraphInstantiate(&gExec2, g2, cudaGraphInstantiateFlagDeviceLaunch);
cudaGraphUpload(gExec2, stream);
// Create and instantiate the launching graph.
cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal);
launchTailGraph<<<1, 1, 0, stream>>>(gExec2);
cudaStreamEndCapture(stream, &g1);
cudaGraphInstantiate(&gExec1, g1);
// Launch the host graph, which will in turn launch the device graph.
cudaGraphLaunch(gExec1, stream);
}

由尾部图排入队列的尾部启动任务，将优先于尾部启动列表中先前图形排入队列的尾部启动任务执行。这些新增的尾部启动任务将按照其入队顺序依次执行。

单个图最多可保留 255 个待执行的尾部启动任务。
3.2.8.6.6.2.1.3.1 尾部自启动
设备图可将自身加入尾部启动队列，但同一时刻每个图仅能注册一个自启动任务。为查询当前运行的设备图句柄以实现重启功能，新增设备端函数：

cudaGraphExec_t cudaGetCurrentGraphExec();

该函数返回当前运行的设备图句柄（若内核执行环境位于设备图内）。若当前执行的内核不隶属于任何设备图节点，则返回 NULL。以下示例代码演示了用于重启循环的典型用法：

（注：术语处理说明

"pending tail launches" 译为"待执行的尾部启动任务"，通过"待执行"明确任务状态
"self-launch"译为"自启动"，保持与计算机体系结构术语的一致性
保留"device graph"标准译法"设备图"，与CUDA文档体系一致
函数原型维持代码原文格式，通过注释说明增强技术文档的可移植性）

__device__ int relaunchCount = 0;
__global__ void relaunchSelf() {
int relaunchMax = 100;
if (threadIdx.x == 0) {
if (relaunchCount < relaunchMax) {
cudaGraphLaunch(cudaGetCurrentGraphExec(), cudaStreamGraphTailLaunch);
}
relaunchCount++;
}
}

posted @ 2025-04-13 04:42 吴建明wujianming 阅读(49) 评论(0) 收藏举报

刷新页面返回顶部

吴建明

微信视频号：sph0RgSyDYV47z6 快手号：4874645212 抖音号：dy0so323fq2w 小红书号：95619019828 B站1：UID:3546863642871878 B站2：UID: 3546955410049087

GPU CUDA设备图谱发布

公告