From 9ddf0fb5784a579729d87cca6c9e50b86b05ff9c Mon Sep 17 00:00:00 2001 From: Alden <1732021810@qq.com> Date: Thu, 11 Sep 2025 07:05:34 +0000 Subject: [PATCH] 20250911-3 --- S1/ICVXKH/example_cudacode.py | 2 +- S1/ICVXKH/example_torchcode.py | 2 +- S1/ICVXKH/prompt.txt | 2 +- S1/ICVXKH/readme.md | 2 +- S1/ICVXKH/run_code.py | 2 +- ...\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/S1/ICVXKH/example_cudacode.py b/S1/ICVXKH/example_cudacode.py index 3232300..f95d078 100644 --- a/S1/ICVXKH/example_cudacode.py +++ b/S1/ICVXKH/example_cudacode.py @@ -11,7 +11,7 @@ __global__ void relu_kernel(const float* x, float* y, int size) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < size) { y[idx] = fmaxf(x[idx], 0.f); - } + } } torch::Tensor relu_cuda(torch::Tensor x) { diff --git a/S1/ICVXKH/example_torchcode.py b/S1/ICVXKH/example_torchcode.py index 7e9d5f8..1322b74 100644 --- a/S1/ICVXKH/example_torchcode.py +++ b/S1/ICVXKH/example_torchcode.py @@ -11,7 +11,7 @@ class Model(nn.Module): def forward(self, x: torch.Tensor) -> torch.Tensor: """ - Performs matrix multiplication and applies ReLU activation. + Performs matrix multiplication and applies ReLU activation. Args: x (torch.Tensor): Input tensor of shape [batch_size, input_dim] diff --git a/S1/ICVXKH/prompt.txt b/S1/ICVXKH/prompt.txt index 0deaedc..4da4fbf 100644 --- a/S1/ICVXKH/prompt.txt +++ b/S1/ICVXKH/prompt.txt @@ -9,7 +9,7 @@ import torch import torch.nn as nn import torch.nn.functional as F - + class Model(nn.Module): def __init__(self) -> None: super().__init__() diff --git a/S1/ICVXKH/readme.md b/S1/ICVXKH/readme.md index 1447c24..787662d 100644 --- a/S1/ICVXKH/readme.md +++ b/S1/ICVXKH/readme.md @@ -6,4 +6,4 @@ example_cudacode.py:和torch对应的cuda代码 prompt.txt:利用LLM从torch代码生成cuda代码的prompt示例,(原始torch代码被附在prompt最后) -run_code.py:用于测试生成的cuda代码和原始torch输出是否一致以及加速情况的示例代码 +run_code.py:用于测试生成的cuda代码和原始torch输出是否一致以及加速情况的示例代码 diff --git a/S1/ICVXKH/run_code.py b/S1/ICVXKH/run_code.py index 24e8694..54ce4b2 100644 --- a/S1/ICVXKH/run_code.py +++ b/S1/ICVXKH/run_code.py @@ -9,7 +9,7 @@ from example_cudacode import ModelNew def run_benchmark(): # 检查 CUDA 是否可用 - if not torch.cuda.is_available(): + if not torch.cuda.is_available(): print("CUDA 不可用,请确保您有可用的 NVIDIA GPU 并已正确安装 PyTorch CUDA 版本。") return else: diff --git "a/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" "b/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" index cb75880..72586d9 100644 --- "a/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" +++ "b/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" @@ -1,4 +1,4 @@ 1. 根据example_torchcode.py的格式,提供一个torch实现的op,命名为torchcode.py 2. 仿照prompt.txt的写法,利用llm(deepseek、通义千问、GPT、Gemini等大模型)生成一个初始的cuda算子,按照example_cudacode.py的格式组织成一个可以运行的cuda op,命名为cudacode_ori.py,并且利用run_code.py 检查算子精度 3. 在符合精度要求的cudacode_ori.py基础上,进行cuda算子性能优化,用run_code.py检查算子精度和加速比,形成最终的最优性能的cuda算子实现,命名为cudacode_opt.py,格式符合example_cudacode.py -4. 针对每一个op,参赛者需要提供四个文件,torchcode.py、prompt.txt、cudacode_ori.py、example_cudacode.py \ No newline at end of file +4. 针对每一个op,参赛者需要提供四个文件,torchcode.py、prompt.txt、cudacode_ori.py、example_cudacode.py \ No newline at end of file -- Gitee