命令行cpp与cu文件混合编译
首先这里有两段代码:
main.cpp:
#include <stdio.h>
#include <iostream>
extern "C"
{
int func();
}
int main()
{
std::cout<<"Hello C++"<<std::endl;
func();
return 0;
}
test.cu:
#include <cuda_runtime.h>
#include <stdio.h>
//thread 1D
__global__ void testThread1(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
c[i] = b[i] - a[i];
}
void addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaSetDevice(0);
cudaMalloc((void**)&dev_c, size * sizeof(int));
cudaMalloc((void**)&dev_a, size * sizeof(int));
cudaMalloc((void**)&dev_b, size * sizeof(int));
cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
testThread1<<<1, size>>>(dev_c, dev_a, dev_b);
cudaMemcpy(c, dev_c, size*sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
cudaGetLastError();
}
extern "C"
int func()
{
const int n = 1000;
int *a = new int[n];
int *b = new int[n];
int *c = new int[n];
int *cc = new int[n];
for (int i = 0; i < n; i++)
{
a[i] = rand() % 100;
b[i] = rand() % 100;
c[i] = b[i] - a[i];
}
addWithCuda(cc, a, b, n);
FILE *fp = fopen("out.txt", "w");
for (int i = 0; i < n; i++)
fprintf(fp, "%d %d\n", c[i], cc[i]);
fclose(fp);
bool flag = true;
for (int i = 0; i < n; i++)
{
if (c[i] != cc[i])
{
flag = false;
break;
}
}
if (flag == false)
printf("no pass");
else
printf("pass");
cudaDeviceReset();
delete[] a;
delete[] b;
delete[] c;
delete[] cc;
return 0;
}
Linux下可以这样:
nvcc -c test.cu g++ -c main.cpp g++ -o main.o test.o -lcudart -L/usr/local/cuda/lib64
Windows下可以这样:
nvcc -c test.cu cl -c main.cpp link main.obj test.obj cudart.lib -libpath:"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\lib\x64"
应该都差不多。


浙公网安备 33010602011771号