2 Star 19 Fork 16

王振邦/CudaLearning

加入 Gitee
与超过 1400万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
checkThreadIndex.cu 2.33 KB
一键复制 编辑 原始数据 按行查看 历史
王振邦 提交于 2023-05-12 22:59 +08:00 . add professional cuda c programming color pdf
#include "../common/common.h"
#include <cuda_runtime.h>
#include <stdio.h>
/*
* This example helps to visualize the relationship between thread/block IDs and
* offsets into data. For each CUDA thread, this example displays the
* intra-block thread ID, the inter-block block ID, the global coordinate of a
* thread, the calculated offset into input data, and the input data at that
* offset.
*/
void printMatrix(int *C, const int nx, const int ny)
{
int *ic = C;
printf("\nMatrix: (%d.%d)\n", nx, ny);
for (int iy = 0; iy < ny; iy++)
{
for (int ix = 0; ix < nx; ix++)
{
printf("%3d", ic[ix]);
}
ic += nx;
printf("\n");
}
printf("\n");
return;
}
__global__ void printThreadIndex(int *A, const int nx, const int ny)
{
int ix = threadIdx.x + blockIdx.x * blockDim.x;
int iy = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int idx = iy * nx + ix;
printf("thread_id (%d,%d) block_id (%d,%d) coordinate (%d,%d) global index"
" %2d ival %2d\n", threadIdx.x, threadIdx.y, blockIdx.x, blockIdx.y,
ix, iy, idx, A[idx]);
}
int main(int argc, char **argv)
{
printf("%s Starting...\n", argv[0]);
// get device information
int dev = 0;
cudaDeviceProp deviceProp;
CHECK(cudaGetDeviceProperties(&deviceProp, dev));
printf("Using Device %d: %s\n", dev, deviceProp.name);
CHECK(cudaSetDevice(dev));
// set matrix dimension
int nx = 8;
int ny = 6;
int nxy = nx * ny;
int nBytes = nxy * sizeof(float);
// malloc host memory
int *h_A;
h_A = (int *)malloc(nBytes);
// iniitialize host matrix with integer
for (int i = 0; i < nxy; i++)
{
h_A[i] = i;
}
printMatrix(h_A, nx, ny);
// malloc device memory
int *d_MatA;
CHECK(cudaMalloc((void **)&d_MatA, nBytes));
// transfer data from host to device
CHECK(cudaMemcpy(d_MatA, h_A, nBytes, cudaMemcpyHostToDevice));
// set up execution configuration
dim3 block(4, 2);
dim3 grid((nx + block.x - 1) / block.x, (ny + block.y - 1) / block.y);
// invoke the kernel
printThreadIndex<<<grid, block>>>(d_MatA, nx, ny);
CHECK(cudaGetLastError());
// free host and devide memory
CHECK(cudaFree(d_MatA));
free(h_A);
// reset device
CHECK(cudaDeviceReset());
return (0);
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/wangzhenbang2023/cuda-learning.git
git@gitee.com:wangzhenbang2023/cuda-learning.git
wangzhenbang2023
cuda-learning
CudaLearning
master

搜索帮助