首先放上代码:
#include "cuda_runtime.h" #include "device_launch_parameters.h" #include "cuda.h" #include using std::cout; using std::endl; #define arrayNum 100 #define mwidth 10 #define mheight 10 #define mlength 10 texture<short, 3, cudaReadModeNormalizedFloat> texVolumeData; // 3D texture cudaArray *d_volumeArray = 0; void textureVolumeDataInit(void) {
short* texturedata = (short*)malloc(mwidth*mlength*mlength*sizeof(short));
for (int i = 0;i < mwidth*mlength*mlength;i++) {
texturedata[i] = i;
}
// --- Create 3D array
const cudaExtent volumeSize = make_cudaExtent(mwidth, mheight, mlength);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<short>();
cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize);
// --- Copy data to 3D array (host to device)
cudaMemcpy3DParms copyParams = { 0 };
copyParams.srcPtr = make_cudaPitchedPtr((void*)texturedata, volumeSize.width * sizeof(short), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(©Params);
// --- Set texture parameters
texVolumeData.normalized = false; // access with normalized texture coordinates
texVolumeData.filterMode = cudaFilterModeLinear; // linear interpolation
texVolumeData.addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
texVolumeData.addressMode[1] = cudaAddressModeWrap;
texVolumeData.addressMode[2] = cudaAddressModeWrap;
// --- Bind array to 3D texture
cudaBindTextureToArray(texVolumeData, d_volumeArray, channelDesc);
free(texturedata);
}
global void cudaTest(float c) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; c[x + y * blockDim.x * gridDim.x] = 65535.0/2.0tex3D(texVolumeData, (float)(x+0.5), (float)(y+0.5),0+0.5); } int main() { float dev_c = 0; float c = 0; c = (float)malloc(arrayNum * sizeof(float)); cudaMalloc((void*)&dev_c, arrayNum * sizeof(float)); textureVolumeDataInit();
dim3 blocks(2, 2);
dim3 threads(5, 5);
cudaTest << <blocks, threads >> >(dev_c);
cudaMemcpy(c, dev_c, arrayNum * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0;i < arrayNum;i++) {
cout << c[i] << endl;
}
system("pause");
return 0;
} 注意几点:
这里收集到的数据是规范化的,所以需要乘一个比例系数:65535/2.0。如果是unsigned short类型的数据,就是65535了。
65535.0/2.0*tex3D(texVolumeData, (float)(x+0.5), (float)(y+0.5),0+0.5);