From 0f45d6d8586ddfde492e1b80b89ceec054fe7bc7 Mon Sep 17 00:00:00 2001 From: Alden <1732021810@qq.com> Date: Thu, 11 Sep 2025 06:44:54 +0000 Subject: [PATCH 1/3] 20250911 --- .../__pycache__/example_cudacode.cpython-310.pyc | Bin 0 -> 1587 bytes .../example_torchcode.cpython-310.pyc | Bin 0 -> 1480 bytes S1/ICVXKH/example_torchcode.py | 1 + 3 files changed, 1 insertion(+) create mode 100644 S1/ICVXKH/__pycache__/example_cudacode.cpython-310.pyc create mode 100644 S1/ICVXKH/__pycache__/example_torchcode.cpython-310.pyc diff --git a/S1/ICVXKH/__pycache__/example_cudacode.cpython-310.pyc b/S1/ICVXKH/__pycache__/example_cudacode.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d786c172788957527c1ff7387fbecd45edd20660 GIT binary patch literal 1587 zcma)6O>g5w7@l!#$8pkb`4S(ekpL;)28Tt0!!{cs6|pRcYNeJHGD0KQGilt8?Jyp1 zozkAR(w?~S1KMN%20z1BPW%P9@Xk0bS;2v^=HvZ(=6znjcCoSHBN)GCzs!S<8=P=6ySQuhhps3w(WC6Adg?X*ENGSoqnSD-Z9 z@(0lW2xzk`u4lvAh$HfGI1Ad)3;)9R4bg`W4}fPO+r%}!;j8-kHpjS9g}DiE>oBh} zV7P)arX}cCp*<B^BujAJ!|EwicSFVMsN52>9gF&sJo>0+`fTORx-$$M(AWN8~|3 z#%r23bgkr*$>bl9(VFLUIvv&_i&ab)N>0o>nGS;yS^k&Oyqa5ZRmq$zcc5Zt+yA>} zJAPy9yqccLPS^Dya2h+0GamJm#e$YaB@C7NM|nRV$!9oAv?C5r5q$VQO^jqfzxw*}*3kDn&c>~}+-HJY9V zO(BKP(F^O!x<(kO&Xs+RuR%i{<-+PRw2yA^UeIlvvRZH{J&0^P3ZGJ{C~d4fZ+cH- z2`m+tjeE+|qq&kAKd>6RK) zce{}Dm+&?6a=iaw{ABmp_uqU4A7-2_1gE-e2{?p{rR-~bn57HLCLUlPpS^a=IK0j7 zU({{z-qm66!J^O=J_oAD*0pt^_Z9hlBzs_n8ak%C8#jwpRtdwyFq0Z5u;e}18eGfQ zVQ-f=w5B^N0;|+lo3}HBgQ<&$26)Rcy1^pSh1Yovu-&oWvK$NR L$A;Z1E7`A6Tv$Ml)sER5f0mX1)q$(>e93oUvMe2uu5;YW|LCBhnz1a!#vDhAx zuF_K@^$+j^l4Je`Kf_l}J@>|m_w_`Zh4Nu(-?9DL&-?K_-flWg35;Kx-xjNokl%5! zIT$RyfSXk;pf~g3d@&D&pX{1-V|0>C%oxvTP^FVENs=aeBRY{cdR5oc*dWo zub%RvwAC-(x9y?Iz$Z5toI|_(&)kNIC6=C&7r}XOK`61|IXk5nup=T#_Q@rEoW(9O zJtU#=u1kcd+R6&yg0^+zy+Rj_vP!%7SXGA$n+1*;RnK(-r!T1+3sKgEF+yCDKkm*J zU881;zGx4+*>R_TnmyQix(jvR?{ulmZ2!~QquuYm|MKBXofOToR>Fc~QNoGs8zq8}IDm<#)kYa!ya7%%k8UcurvzV%fk0Ef^)l zHx7B&`|su#t>ncL$WZ3Z=C48S=|i)#ULouW|HK!z{jF+Er?VaYs9pA!TfgMpoSQ|l zRQ!j7!j=nRs+GFKD?B1()%=*Q!Jhb9|AxK^y~Drh?f*h|{>*@l`QgZzi#(ExL3yA$ zPpM;C)qNJ|F?3OHp_#1B6lQJ&YCSHr#O;BoG^MNQK-85l8>5g~kPZkb6{6{64`d5B z5#r~*sINRDA!Jty;fdEb5Ir6`rr*KE^h#W%s%toPoe2InXw1fo7MVAwn}jer32!sM z`wH5B{Io z_9;D77TywYV?F?=*BEa?W2!%d$-ic!-vjSvZ2dlN;8S+#HH{Dx3*Kmn=jQDpCcNJv z@JD!tqxl$2mJZbSpbpsaP1BgFkEZ=y%+=rBh~#PT;gZh>V((%<4fZ>M P5^o0V7Nd+N!G!$@C;M9Z literal 0 HcmV?d00001 diff --git a/S1/ICVXKH/example_torchcode.py b/S1/ICVXKH/example_torchcode.py index 7e9d5f8..55b5e00 100644 --- a/S1/ICVXKH/example_torchcode.py +++ b/S1/ICVXKH/example_torchcode.py @@ -14,6 +14,7 @@ class Model(nn.Module): Performs matrix multiplication and applies ReLU activation. Args: + x (torch.Tensor): Input tensor of shape [batch_size, input_dim] Returns: -- Gitee From b30d09292324a9e9cfeea1df11cfb684327066f2 Mon Sep 17 00:00:00 2001 From: Alden <1732021810@qq.com> Date: Thu, 11 Sep 2025 06:59:27 +0000 Subject: [PATCH 2/3] 20250911 --- S1/ICVXKH/example_cudacode.py | 1 + S1/ICVXKH/example_torchcode.py | 1 + S1/ICVXKH/prompt.txt | 1 + S1/ICVXKH/readme.md | 1 + S1/ICVXKH/run_code.py | 1 + 5 files changed, 5 insertions(+) diff --git a/S1/ICVXKH/example_cudacode.py b/S1/ICVXKH/example_cudacode.py index 3232300..201fc7c 100644 --- a/S1/ICVXKH/example_cudacode.py +++ b/S1/ICVXKH/example_cudacode.py @@ -14,6 +14,7 @@ __global__ void relu_kernel(const float* x, float* y, int size) { } } + torch::Tensor relu_cuda(torch::Tensor x) { auto size = x.numel(); auto y = torch::empty_like(x); diff --git a/S1/ICVXKH/example_torchcode.py b/S1/ICVXKH/example_torchcode.py index 55b5e00..5a3a4c9 100644 --- a/S1/ICVXKH/example_torchcode.py +++ b/S1/ICVXKH/example_torchcode.py @@ -14,6 +14,7 @@ class Model(nn.Module): Performs matrix multiplication and applies ReLU activation. Args: + x (torch.Tensor): Input tensor of shape [batch_size, input_dim] diff --git a/S1/ICVXKH/prompt.txt b/S1/ICVXKH/prompt.txt index 0deaedc..5f46ba9 100644 --- a/S1/ICVXKH/prompt.txt +++ b/S1/ICVXKH/prompt.txt @@ -18,6 +18,7 @@ class Model(nn.Module): return a + b + def get_inputs(): # randomly generate input tensors based on the model architecture a = torch.randn(1, 128).cuda() diff --git a/S1/ICVXKH/readme.md b/S1/ICVXKH/readme.md index 1447c24..be728f6 100644 --- a/S1/ICVXKH/readme.md +++ b/S1/ICVXKH/readme.md @@ -7,3 +7,4 @@ example_cudacode.py:和torch对应的cuda代码 prompt.txt:利用LLM从torch代码生成cuda代码的prompt示例,(原始torch代码被附在prompt最后) run_code.py:用于测试生成的cuda代码和原始torch输出是否一致以及加速情况的示例代码 + diff --git a/S1/ICVXKH/run_code.py b/S1/ICVXKH/run_code.py index 24e8694..d1d4ef7 100644 --- a/S1/ICVXKH/run_code.py +++ b/S1/ICVXKH/run_code.py @@ -15,6 +15,7 @@ def run_benchmark(): else: device = torch.device("cuda") + # 初始化模型 init_inputs = get_init_inputs() init_inputs = [ -- Gitee From d5140a8a8d1be97d5e77605bdf4d54711af228ba Mon Sep 17 00:00:00 2001 From: Alden <1732021810@qq.com> Date: Thu, 11 Sep 2025 07:02:54 +0000 Subject: [PATCH 3/3] 20250911-2 --- ...\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git "a/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" "b/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" index cb75880..753dc87 100644 --- "a/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" +++ "b/S1/ICVXKH/\345\217\202\350\265\233\350\200\205\351\234\200\350\246\201\346\217\220\344\276\233\347\232\204\345\206\205\345\256\271.md" @@ -1,4 +1,4 @@ 1. 根据example_torchcode.py的格式,提供一个torch实现的op,命名为torchcode.py 2. 仿照prompt.txt的写法,利用llm(deepseek、通义千问、GPT、Gemini等大模型)生成一个初始的cuda算子,按照example_cudacode.py的格式组织成一个可以运行的cuda op,命名为cudacode_ori.py,并且利用run_code.py 检查算子精度 3. 在符合精度要求的cudacode_ori.py基础上,进行cuda算子性能优化,用run_code.py检查算子精度和加速比,形成最终的最优性能的cuda算子实现,命名为cudacode_opt.py,格式符合example_cudacode.py -4. 针对每一个op,参赛者需要提供四个文件,torchcode.py、prompt.txt、cudacode_ori.py、example_cudacode.py \ No newline at end of file +4. 针对每一个op,参赛者需要提供四个文件,torchcode.py、prompt.txt、cudacode_ori.py、example_cudacode.py -- Gitee