代码拉取完成,页面将自动刷新
#include <stdio.h>
__global__ void gpu_shared_memory(float *d_a)
{
// Defining local variables which are private to each thread
int i, index = threadIdx.x;
float average, sum = 0.0f;
//Define shared memory
__shared__ float sh_arr[10];
sh_arr[index] = d_a[index];
__syncthreads(); // This ensures all the writes to shared memory have completed
for (i = 0; i<= index; i++)
{
sum += sh_arr[i];
}
average = sum / (index + 1.0f);
d_a[index] = average;
sh_arr[index] = average;
}
int main(int argc, char **argv)
{
//Define Host Array
float h_a[10];
//Define Device Pointer
float *d_a;
for (int i = 0; i < 10; i++) {
h_a[i] = i;
}
// allocate global memory on the device
cudaMalloc((void **)&d_a, sizeof(float) * 10);
// now copy data from host memory to device memory
cudaMemcpy((void *)d_a, (void *)h_a, sizeof(float) * 10, cudaMemcpyHostToDevice);
gpu_shared_memory << <1, 10 >> >(d_a);
// copy the modified array back to the host memory
cudaMemcpy((void *)h_a, (void *)d_a, sizeof(float) * 10, cudaMemcpyDeviceToHost);
printf("Use of Shared Memory on GPU: \n");
//Printing result on console
for (int i = 0; i < 10; i++) {
printf("The running average after %d element is %f \n", i, h_a[i]);
}
return 0;
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。