cuda 中的内存拷贝

明白几个概念

  • cudaPitchedPtr(void *ptr, size_t pitch, size_t xsize, size_t ysize)
    cudaPitchedPtr 用于描述一块有行跨度的内存空间。
  • pitch
    pitch 在内存管理中,它指物理内存中实际分配的行跨度,表示从一行数据跳到下一行需要跨越的字节数, 它包含填充部分的数据。
  • xsize
    xsize 表示有效元素个数, 它指每一行有效元素个数,也可以理解为列数
  • ysize
    ysize 表示行数

占用的总字节数: pitch * ysize (单位:字节)

  • cudaPos(size_t x, size_t y, size_t z)
    cudaPos表示偏移的字节数,

  • cudaExtent(size_t w, size_t h, size_t d)
    cudaExtent 表示内存在三个维度上的延展范围,当引用时数组内存,w表示元素个数, 当应用时线性内存, w表示字节数,w不包含填充部分
    。h 元素个数, d 元素个数。

所以,当描述一块线性内存的时候,需要使用 cudaPitchedPtr,cudaPos, cudaExtent 三个参数一起描述。 可以理解为: cudaPitchedPtr描述内存空间, cudaExtent 描述内存的延展范围, cudaPos 表示内存的起始偏移位置。

简单理解: cudaPos相当于mysql的offset, cudaExtent相当于mysql的limit

#include <cuda_runtime.h>
#include <iostream>

int main() {
    const int inputSize = 100;
    const int copySize = 90;

    float *h_data = new float[inputSize];
    for (int i = 0; i < inputSize; ++i) h_data[i] = i;

    float *d_data;
    cudaMalloc(&d_data, inputSize * sizeof(float));

    cudaMemcpy3DParms p = {0};
    p.srcPos = make_cudaPos(2*sizeof(float), 0, 0); // 相当于offset
    p.srcPtr = make_cudaPitchedPtr(h_data, inputSize * sizeof(float), inputSize, 1);
    p.dstPtr = make_cudaPitchedPtr(d_data, inputSize * sizeof(float), inputSize, 1);
    p.dstPos = make_cudaPos(3 * sizeof(float), 0, 0);
    p.extent = make_cudaExtent(copySize * sizeof(float), 1, 1); // 相当于 limit
    p.kind = cudaMemcpyHostToDevice;

    cudaMemcpy3D(&p);

    float result[inputSize];
    cudaMemcpy(result, d_data, inputSize * sizeof(float), cudaMemcpyDeviceToHost);

    for (int i = 0; i < inputSize; ++i)
        std::cout << "result[" << i << "] = " << result[i] << std::endl;

    delete[] h_data;
    cudaFree(d_data);
    return 0;
}

输出:

result[0] = 0
result[1] = 0
result[2] = 0
result[3] = 2
result[4] = 3
result[5] = 4
result[6] = 5
result[7] = 6
result[8] = 7
result[9] = 8
result[10] = 9
result[11] = 10
result[12] = 11
result[13] = 12
result[14] = 13
result[15] = 14
result[16] = 15
result[17] = 16
result[18] = 17
result[19] = 18
result[20] = 19
result[21] = 20
result[22] = 21
result[23] = 22
result[24] = 23
result[25] = 24
result[26] = 25
result[27] = 26
result[28] = 27
result[29] = 28
result[30] = 29
result[31] = 30
result[32] = 31
result[33] = 32
result[34] = 33
result[35] = 34
result[36] = 35
result[37] = 36
result[38] = 37
result[39] = 38
result[40] = 39
result[41] = 40
result[42] = 41
result[43] = 42
result[44] = 43
result[45] = 44
result[46] = 45
result[47] = 46
result[48] = 47
result[49] = 48
result[50] = 49
result[51] = 50
result[52] = 51
result[53] = 52
result[54] = 53
result[55] = 54
result[56] = 55
result[57] = 56
result[58] = 57
result[59] = 58
result[60] = 59
result[61] = 60
result[62] = 61
result[63] = 62
result[64] = 63
result[65] = 64
result[66] = 65
result[67] = 66
result[68] = 67
result[69] = 68
result[70] = 69
result[71] = 70
result[72] = 71
result[73] = 72
result[74] = 73
result[75] = 74
result[76] = 75
result[77] = 76
result[78] = 77
result[79] = 78
result[80] = 79
result[81] = 80
result[82] = 81
result[83] = 82
result[84] = 83
result[85] = 84
result[86] = 85
result[87] = 86
result[88] = 87
result[89] = 88
result[90] = 89
result[91] = 90
result[92] = 91
result[93] = 0
result[94] = 0
result[95] = 0
result[96] = 0
result[97] = 0
result[98] = 0
result[99] = 0



posted @ 2025-04-29 15:02  xiezhengcai  阅读(48)  评论(0)    收藏  举报