message

float* output1; output1 = (float* )malloc(18*4*sizeof(float)); int* output2; output2 = (int* )malloc(18 * 4 * sizeof(int)); CHECK(cudaMemcpyAsync(buffers[0], InputDatas[0].data(), mInputParams.BatchSize * mInputParams.ImgC * mInputParams.ImgH * mInputParams.ImgW * sizeof(float), cudaMemcpyHostToDevice, stream)); mContext->enqueue(mInputParams.BatchSize, buffers,stream, nullptr); CHECK(cudaMemcpyAsync(output1, buffers[1], 1 * 18 * 4*sizeof(float), cudaMemcpyDeviceToHost, stream)); CHECK(cudaMemcpyAsync(output2, buffers[2], 1 * 18 * 4*sizeof(int), cudaMemcpyDeviceToHost, stream));

浙公网安备 33010602011771号