OpenCL入门

初入OpenCL,做个记录。

在Windows下开发OpenCL程序,必须先下载OpenCL的SDK,现在AMD,NVIDIA,Intel均提供各自的OpenCL库,基本是大同小异。安装好SDK后新建Win32控制台项目,然后需要配置下包含文件路径和库路径,具体见下图(我安装的Intel的SDK )。

1.其中那个包含Intel的路径就是包含cl.h文件的目录。

2.如图中那个Intel的lib目录

3.添加需要连接的静态库OpenCL.lib

配置完成后就可以开始写代码调试了,OpenCL的初始化还是很复杂的,和CUDA几行代码搞定完全没可比性,刚开始可能对流程不太熟悉,慢慢熟悉就好,当然也可以自己写个框架来做这些复杂的初始化工作。OpenCL的内核代码是即时编译的,代码中我为了方便没有从cl文件中读入Kernel代码,直接以字符串的形式定义了。

  1 #include "stdafx.h"
  2 
  3 #include <iostream>
  4 #include <fstream>
  5 #include <string.h>
  6 #include <vector>
  7 using namespace std;
  8 
  9 #if defined(__APPLE__) || defined(__MACOSX)
 10 #include <OpenCL/cl.hpp>
 11 #else
 12 #include <CL/cl.h>
 13 #endif
 14 
 15 #define KERNEL(...) #__VA_ARGS__
 16 
 17 #define ARRAY_X_LEN 16
 18 #define ARRAY_Y_LEN 16
 19 
 20 const char *kernelSourceCode = KERNEL(
 21     __kernel void VecAdd(__global int *buffer1, __global int *buffer2, __global int *buffer3)
 22 {
 23         size_t idx = get_global_id(0);
 24         size_t idy = get_global_id(1);
 25         int dimX = get_global_size(0);
 26         int dimY = get_global_size(1);
 27         int id = idx + idy*dimX;
 28         buffer3[id] = buffer1[id] + buffer2[id];
 29     });
 30 
 31 int main()
 32 {
 33     cl_int status = 0;
 34     size_t deviceListSize;
 35     cl_uint numPlatforms;
 36     cl_platform_id platform = NULL;
 37     status = clGetPlatformIDs(0, NULL, &numPlatforms);
 38     if (status != CL_SUCCESS)
 39     {
 40         printf("获取平台数目失败");
 41         return EXIT_FAILURE;
 42     }
 43     if (numPlatforms >0)
 44     {
 45         cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
 46         status = clGetPlatformIDs(numPlatforms, platforms, NULL);
 47         if (status != CL_SUCCESS)
 48         {
 49             printf("初始化平台失败");
 50             return -1;
 51         }
 52         for (unsigned int i = 0; i<numPlatforms; ++i)
 53         {
 54             char *vendor = (char*)malloc(100);
 55             status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL);
 56             platform = platforms[i];
 57             if (!strcmp(vendor, "NVIDIA Corporation"))
 58             {
 59                 break;
 60             }
 61         }
 62         delete platforms;
 63     }
 64     cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
 65     cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
 66     cl_context context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
 67     if (status != CL_SUCCESS)
 68     {
 69         printf("创建上下文失败");
 70         return EXIT_FAILURE;
 71     }
 72     status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize);
 73     if (status != CL_SUCCESS)
 74     {
 75         printf("获取设备数目失败");
 76         return EXIT_FAILURE;
 77     }
 78     cl_device_id *devices = (cl_device_id *)malloc(deviceListSize);
 79     if (devices == 0)
 80     {
 81         printf("为设备分配空间失败");
 82         return EXIT_FAILURE;
 83     }
 84     status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL);
 85     if (status != CL_SUCCESS)
 86     {
 87         printf("初始化设备失败");
 88         return EXIT_FAILURE;
 89     }
 90 
 91     size_t sourceSize[] = { strlen(kernelSourceCode) };
 92     cl_program program = clCreateProgramWithSource(context, 1, &kernelSourceCode, sourceSize, &status);
 93     if (status != CL_SUCCESS)
 94     {
 95         printf("创建程序失败");
 96         return EXIT_FAILURE;
 97     }
 98     status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
 99     if (status != CL_SUCCESS)
100     {
101         printf("编译程序失败");
102         return EXIT_FAILURE;
103     }
104     cl_kernel kernel = clCreateKernel(program, "VecAdd", &status);
105     if (status != CL_SUCCESS)
106     {
107         printf("创建内核失败");
108         return EXIT_FAILURE;
109     }
110     cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, &status);
111     if (status != CL_SUCCESS)
112     {
113         printf("创建命令队列失败");
114         return EXIT_FAILURE;
115     }
116     int arrayLenght = ARRAY_X_LEN*ARRAY_Y_LEN;
117     int arraySize = arrayLenght*sizeof(int);
118 
119     int *hA = new int[arrayLenght];
120     int *hB = new int[arrayLenght];
121     int *hC = new int[arrayLenght];
122 
123     memset(hA, 0, arraySize);
124     memset(hB, 0, arraySize);
125     memset(hC, 0, arraySize);
126 
127     for (int i = 0; i<arrayLenght; i++)
128     {
129         hA[i] = i;
130         hB[i] = i;
131     }
132 
133     cl_mem dA = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, arraySize, NULL, &status);
134     if (status != CL_SUCCESS)
135     {
136         printf("创建内存对象失败");
137         return EXIT_FAILURE;
138     }
139     cl_mem dB = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, arraySize, NULL, &status);
140     if (status != CL_SUCCESS)
141     {
142         printf("创建内存对象失败");
143         return EXIT_FAILURE;
144     }
145     cl_mem dC = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, arraySize, NULL, &status);
146     if (status != CL_SUCCESS)
147     {
148         printf("创建内存对象失败");
149         return EXIT_FAILURE;
150     }
151     status = clEnqueueWriteBuffer(commandQueue, dA, CL_TRUE, 0, arraySize, hA, 0, NULL, NULL);
152     if (status != CL_SUCCESS)
153     {
154         printf("输入值写入内存对象失败");
155         return EXIT_FAILURE;
156     }
157     status = clEnqueueWriteBuffer(commandQueue, dB, CL_TRUE, 0, arraySize, hB, 0, NULL, NULL);
158     if (status != CL_SUCCESS)
159     {
160         printf("输入值写入内存对象失败");
161         return EXIT_FAILURE;
162     }
163     status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&dA);
164     if (status != CL_SUCCESS)
165     {
166         printf("设置内核参数失败");
167         return EXIT_FAILURE;
168     }
169     status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&dB);
170     if (status != CL_SUCCESS)
171     {
172         printf("设置内核参数失败");
173         return EXIT_FAILURE;
174     }
175     status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&dC);
176     if (status != CL_SUCCESS)
177     {
178         printf("设置内核参数失败");
179         return EXIT_FAILURE;
180     }
181     size_t globalThreads[] = { ARRAY_X_LEN, ARRAY_Y_LEN };
182     size_t localThreads[] = { 4, 4 };
183     status = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL);
184     if (status != CL_SUCCESS)
185     {
186         printf("将内核放入命令队列失败");
187         return EXIT_FAILURE;
188     }
189     status = clFinish(commandQueue);
190     if (status != CL_SUCCESS)
191     {
192         printf("队列还没有完成");
193         return EXIT_FAILURE;
194     }
195     status = clEnqueueReadBuffer(commandQueue, dC, CL_TRUE, 0, arraySize, hC, 0, NULL, NULL);
196     if (status != CL_SUCCESS)
197     {
198         printf("读内存对象失败");
199         return EXIT_FAILURE;
200     }
201     printf("结果:\n");
202     for (int i = 0; i<arrayLenght; i++)
203     {
204         printf("%d ", hC[i]);
205         if ((i + 1) % ARRAY_X_LEN == 0)
206             printf("\n");
207     }
208     status = clReleaseKernel(kernel);
209     status = clReleaseProgram(program);
210     status = clReleaseMemObject(dA);
211     status = clReleaseMemObject(dB);
212     status = clReleaseMemObject(dC);
213     status = clReleaseCommandQueue(commandQueue);
214     status = clReleaseContext(context);
215     free(devices);
216     delete [] hA;
217     delete [] hB;
218     delete [] hC;
219     return 0;
220 }

运行结果:

posted @ 2014-04-29 14:33  飞越彩虹  阅读(2660)  评论(0编辑  收藏  举报