diff --git a/S1/ICVXKH/example_cudacode.py b/S1/ICVXKH/example_cudacode.py index 3232300efee7e8feeb1170f2ea03d12b1295c238..4e61f119eec293c443cea69cfd129e6d66e0b2ff 100644 --- a/S1/ICVXKH/example_cudacode.py +++ b/S1/ICVXKH/example_cudacode.py @@ -17,7 +17,7 @@ __global__ void relu_kernel(const float* x, float* y, int size) { torch::Tensor relu_cuda(torch::Tensor x) { auto size = x.numel(); auto y = torch::empty_like(x); - const int block_size = 256; + const int block_size = 128; int num_blocks = (size + block_size - 1) / block_size; relu_kernel<<>>(x.data_ptr(), y.data_ptr(), size); return y;