cuda编程(1)
cuda:
#include <stdio.h>
#include <stdlib.h>
//#include <conio.h>
__global__ void what_is_my_id (unsigned int * const block, unsigned int* const thread, unsigned int* const warp,
unsigned int* const calc_thread)
{
const unsigned int thread_idx = (blockIdx.x * blockDim.x) + threadIdx.x;
printf("%d \n", thread_idx);
block[thread_idx] = blockIdx.x;
thread[thread_idx] = threadIdx.x;
warp[thread_idx] = threadIdx.x / warpSize;
calc_thread[thread_idx] = thread_idx;
}
#define ARRAY_SIZE 128
#define ARRAY_SIZE_IN_BYTES (sizeof(unsigned int) * (ARRAY_SIZE))
unsigned int cpu_block[ARRAY_SIZE];
unsigned int cpu_thread[ARRAY_SIZE];
unsigned int cpu_warp[ARRAY_SIZE];
unsigned int cpu_calc_thread[ARRAY_SIZE];
int main(void)
{
const unsigned int num_blocks = 2;
const unsigned int num_threads = 64;
unsigned int * gpu_block;
unsigned int * gpu_thread;
unsigned int * gpu_warp;
unsigned int * gpu_calc_thread;
unsigned int i;
cudaMalloc((void **)&gpu_block, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)&gpu_thread, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)&gpu_warp, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)&gpu_calc_thread, ARRAY_SIZE_IN_BYTES);
what_is_my_id<<<num_blocks, num_threads>>> (gpu_block, gpu_thread, gpu_warp, gpu_calc_thread);
cudaMemcpy(cpu_block, gpu_block, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_thread, gpu_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_warp, gpu_warp, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_calc_thread, gpu_calc_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaFree(gpu_block);
cudaFree(gpu_thread);
cudaFree(gpu_warp);
cudaFree(gpu_calc_thread);
for(int i = 0; i < ARRAY_SIZE; i++)
{
printf("calculated thread: %d - block: %d - warp %d - Thread %d \n", cpu_calc_thread[i], cpu_block[i], cpu_warp[i], cpu_thread[i]);
}
//ch = getch();
}
cmakelist:
add_definitions(-std=c++11)
find_package(CUDA REQUIRED) include_directories(${CUDA_INCLUDE_DIRS}) cuda_add_executable(test test.cu) target_link_libraries(test ${CUDA_LIBRARIES})
浙公网安备 33010602011771号