template.cu 5.22 KB

liyonghelpme 提交于 2020-11-08 23:26 . double速度慢效果差不多
// Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
// Please refer to the NVIDIA end user license agreement (EULA) associated
// with this source code for terms and conditions that govern your use of
// this software. Any use, reproduction, disclosure, or distribution of
// this software and related documentation outside the terms of the EULA
// is strictly prohibited.
/* Template project which demonstrates the basics on how to setup a project
* example application.
* Host code.
// includes, system
//#include <stdlib.h>
//#include <stdio.h>
//#include <string.h>
//#include <math.h>
//// includes CUDA
//#include <cuda_runtime.h>
//// includes, project
//#include <helper_cuda.h>
//#include <helper_functions.h> // helper functions for SDK examples
//// declaration, forward
//void runTest(int argc, char **argv);
//extern "C"
//void computeGold(JINGDU *reference, JINGDU *idata, const unsigned int len);
////! Simple test kernel for device functionality
////! @param g_idata input data in global memory
////! @param g_odata output data in global memory
//__global__ void
//testKernel(JINGDU *g_idata, JINGDU *g_odata)
// // shared memory
// // the size is determined by the host application
// extern __shared__ JINGDU sdata[];
// // access thread id
// const unsigned int tid = threadIdx.x;
// // access number of threads in this block
// const unsigned int num_threads = blockDim.x;
// // read in input data from global memory
// sdata[tid] = g_idata[tid];
// __syncthreads();
// // perform some computations
// sdata[tid] = (JINGDU) num_threads * sdata[tid];
// __syncthreads();
// // write data to global memory
// g_odata[tid] = sdata[tid];
// Program main
//main(int argc, char **argv)
// runTest(argc, argv);
//! Run a simple test for CUDA
//runTest(int argc, char **argv)
// bool bTestResult = true;
// printf("%s Starting...\n\n", argv[0]);
// // use command-line specified CUDA device, otherwise use device with highest Gflops/s
// int devID = findCudaDevice(argc, (const char **)argv);
// StopWatchInterface *timer = 0;
// sdkCreateTimer(&timer);
// sdkStartTimer(&timer);
// unsigned int num_threads = 32;
// unsigned int mem_size = sizeof(JINGDU) * num_threads;
// // allocate host memory
// JINGDU *h_idata = (JINGDU *) malloc(mem_size);
// // initalize the memory
// for (unsigned int i = 0; i < num_threads; ++i)
// {
// h_idata[i] = (JINGDU) i;
// }
// // allocate device memory
// JINGDU *d_idata;
// checkCudaErrors(cudaMalloc((void **) &d_idata, mem_size));
// // copy host memory to device
// checkCudaErrors(cudaMemcpy(d_idata, h_idata, mem_size,
// cudaMemcpyHostToDevice));
// // allocate device memory for result
// JINGDU *d_odata;
// checkCudaErrors(cudaMalloc((void **) &d_odata, mem_size));
// // setup execution parameters
// dim3 grid(1, 1, 1);
// dim3 threads(num_threads, 1, 1);
// // execute the kernel
// testKernel<<< grid, threads, mem_size >>>(d_idata, d_odata);
// // check if kernel execution generated and error
// getLastCudaError("Kernel execution failed");
// // allocate mem for the result on host side
// JINGDU *h_odata = (JINGDU *) malloc(mem_size);
// // copy result from device to host
// checkCudaErrors(cudaMemcpy(h_odata, d_odata, sizeof(JINGDU) * num_threads,
// cudaMemcpyDeviceToHost));
// sdkStopTimer(&timer);
// printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer));
// sdkDeleteTimer(&timer);
// // compute reference solution
// JINGDU *reference = (JINGDU *) malloc(mem_size);
// computeGold(reference, h_idata, num_threads);
// // check result
// if (checkCmdLineFlag(argc, (const char **) argv, "regression"))
// {
// // write file for regression test
// sdkWriteFile("./data/regression.dat", h_odata, num_threads, 0.0f, false);
// }
// else
// {
// // custom output handling when no regression test running
// // in this case check if the result is equivalent to the expected solution
// bTestResult = compareData(reference, h_odata, num_threads, 0.0f, 0.0f);
// }
// // cleanup memory
// free(h_idata);
// free(h_odata);
// free(reference);
// checkCudaErrors(cudaFree(d_idata));
// checkCudaErrors(cudaFree(d_odata));
// exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
