From 45a690248b7c0d972511dc768937992e0d740ea7 Mon Sep 17 00:00:00 2001 From: zangyan Date: Mon, 14 Jul 2025 09:11:19 +0000 Subject: [PATCH 01/11] !65 update README.md. Merge pull request !65 from zangyan/r1.5.1 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6f58944..04bb336 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,7 @@ HCCL软件包安装完成后,开发者可通过HCCL Test工具进行集合通 ## 相关文档 -HCCL提供了使用指南、环境变量参考、基于本源码仓进行定制的开发指南、算法分析工具使用指导等,详细可参见[HCCL资料书架总览](https://gitee.com/ascend/cann-hccl/wikis/HCCL%E8%B5%84%E6%96%99%E4%B9%A6%E6%9E%B6%E6%80%BB%E8%A7%88)。 +HCCL提供了用户指南、环境变量参考、基于源码仓进行算法与算子定制的开发指南等,详细可参见[HCCL资料书架总览](https://gitee.com/ascend/cann-hccl/wikis/HCCL%E8%B5%84%E6%96%99%E4%B9%A6%E6%9E%B6%E6%80%BB%E8%A7%88)。 ## 贡献指南 -- Gitee From 1a02ac41525904486020d8f9b535817065a9ea98 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 18 Jul 2025 13:52:56 +0000 Subject: [PATCH 02/11] =?UTF-8?q?!70=20[HCCL=E7=AB=9E=E8=B5=9B]=20?= =?UTF-8?q?=E8=B5=9B=E9=A2=98=E6=A1=86=E6=9E=B6+=E8=B5=9B=E9=A2=98?= =?UTF-8?q?=E6=8C=87=E5=AF=BC+=E8=B5=9B=E9=A2=98FAQ=20*=20contest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 6 + CMakeLists.txt | 2 + Dockerfile | 193 +++++++++ contest.md | 280 +++++++++++++ faq.md | 118 ++++++ img/offset_calc.jpg | Bin 0 -> 29402 bytes .../coll_all_reduce/CMakeLists.txt | 4 +- ...ll_custom_huge_all_reduce_mesh_executor.cc | 142 +++++++ ...oll_custom_huge_all_reduce_mesh_executor.h | 36 ++ ..._custom_medium_all_reduce_mesh_executor.cc | 142 +++++++ ...l_custom_medium_all_reduce_mesh_executor.h | 36 ++ ...l_custom_small_all_reduce_mesh_executor.cc | 142 +++++++ ...ll_custom_small_all_reduce_mesh_executor.h | 36 ++ .../algorithm/impl/operator/CMakeLists.txt | 1 + .../impl/operator/all_reduce_operator.cc | 2 +- .../operator/custom_all_reduce_operator.cc | 51 +++ .../operator/custom_all_reduce_operator.h | 31 ++ submit.sh | 42 ++ test/CMakeLists.txt | 11 +- test/algorithm/testcase/main.cc | 2 +- .../algorithm/testcase/testcase_all_reduce.cc | 392 +++++++++++++++++- 21 files changed, 1660 insertions(+), 9 deletions(-) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 contest.md create mode 100644 faq.md create mode 100644 img/offset_calc.jpg create mode 100644 src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.cc create mode 100644 src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.h create mode 100644 src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.cc create mode 100644 src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.h create mode 100644 src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.cc create mode 100644 src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.h create mode 100644 src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc create mode 100644 src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.h create mode 100755 submit.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4d70f07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.vscode +.idea +build +output +__pycache__ +*.log diff --git a/CMakeLists.txt b/CMakeLists.txt index c45e7ae..e1fe63f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,8 @@ project(hccl) option(BUILD_OPEN_PROJECT "Build open hccl project." ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + if(BUILD_OPEN_PROJECT) include(cmake/config.cmake) add_subdirectory(src/domain/collective_communication) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ae431ab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,193 @@ +# NOTE: Building this image requires docker version >= 18.0 + +ARG TARGETPLATFORM=linux/arm64 +ARG BASE_IMAGE=ubuntu:22.04 +ARG PYTHON_VERSION=3.10 + +# 阶段 1:安装依赖 +FROM ${BASE_IMAGE} AS base + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + apt-transport-https \ + ca-certificates \ + build-essential \ + bash \ + curl \ + git \ + wget \ + gcc \ + g++ \ + make \ + cmake \ + zlib1g \ + openssl \ + unzip \ + pciutils \ + net-tools \ + gfortran \ + patchelf \ + libblas3 \ + libblas-dev \ + libssl-dev \ + zlib1g-dev \ + libncurses5-dev \ + libbz2-dev \ + libreadline-dev \ + libsqlite3-dev \ + libffi-dev \ + libnss3-dev \ + libgdbm-dev \ + liblzma-dev \ + libev-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /var/tmp/* \ + && rm -rf /tmp/* + +# 阶段 2:安装 Conda +FROM base AS conda-installer + +ARG TARGETPLATFORM +ARG PYTHON_VERSION + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") ARCH=aarch64 ;; \ + *) ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -o /tmp/miniconda.sh -O "https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-${ARCH}.sh" + +RUN chmod +x /tmp/miniconda.sh && \ + bash /tmp/miniconda.sh -b -p /opt/conda && \ + rm /tmp/miniconda.sh && \ + /opt/conda/bin/conda install -y python=${PYTHON_VERSION} && \ + /opt/conda/bin/conda clean -ya + +# 阶段 3:安装 CANN 8.2.RC1.alpha003 +FROM conda-installer AS cann-installer + +ARG TARGETPLATFORM +ENV PATH=/opt/conda/bin:${PATH} + +RUN pip install --no-cache-dir -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \ + attrs cython numpy==1.24.0 decorator sympy cffi pyyaml pathlib2 \ + psutil protobuf==3.20 scipy requests absl-py + +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") ARCH=aarch64 ;; \ + *) ARCH=x86_64 ;; \ + esac && \ + CANN_TOOLKIT_URL="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-toolkit_8.2.RC1.alpha003_linux-${ARCH}.run" && \ + curl -fsSL -o /tmp/Ascend-cann-toolkit.run -O "${CANN_TOOLKIT_URL}" && \ + CANN_COMMUNITY_SDK_URL="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-communitysdk_8.2.RC1.alpha003_linux-${ARCH}.run" && \ + curl -fsSL -o /tmp/Ascend-cann-communitysdk.run -O "${CANN_COMMUNITY_SDK_URL}" + +# 安装 CANN Toolkit +RUN chmod +x /tmp/Ascend-cann-toolkit.run && \ + /tmp/Ascend-cann-toolkit.run --quiet --install && \ + rm /tmp/Ascend-cann-toolkit.run + +# 安装 Community SDK +RUN chmod +x /tmp/Ascend-cann-communitysdk.run && \ + /tmp/Ascend-cann-communitysdk.run --quiet --full && \ + rm /tmp/Ascend-cann-communitysdk.run + +# 阶段 4:下载 HCCL 仓库及其依赖 +FROM cann-installer AS hccl-installer + +WORKDIR /workspace + +RUN curl -fsSL -o /tmp/include.zip -O https://github.com/nlohmann/json/releases/download/v3.11.2/include.zip && \ + unzip -d /workspace/nlohmann_json /tmp/include.zip && \ + rm /tmp/include.zip + +# 安装 MPI +RUN curl -fsSL -o /tmp/mpich.tar.gz -O https://www.mpich.org/static/downloads/3.2.1/mpich-3.2.1.tar.gz && \ + tar -zxf /tmp/mpich.tar.gz -C /workspace && \ + cd /workspace/mpich-3.2.1 && \ + ./configure --disable-fortran --prefix=/workspace/mpich --with-device=ch3:nemesis && \ + make && make install && \ + rm -r /workspace/mpich-3.2.1 && \ + rm /tmp/mpich.tar.gz + +# 设置环境变量 +RUN \ + # Conda 环境变量 + echo 'export PATH=/opt/conda/bin:${PATH}' >> /root/.bashrc && \ + # NPU 驱动环境变量 + echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:${LD_LIBRARY_PATH}' >> /root/.bashrc && \ + echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common:${LD_LIBRARY_PATH}' >> /root/.bashrc && \ + # CANN Toolkit 环境变量 + echo 'source /usr/local/Ascend/ascend-toolkit/set_env.sh' >> /root/.bashrc && \ + # MPICH 环境变量 + echo 'export PATH=/workspace/mpich/bin:${PATH}' >> /root/.bashrc && \ + echo 'export LD_LIBRARY_PATH=/workspace/mpich/lib:${LD_LIBRARY_PATH}' >> /root/.bashrc + +# 阶段 5:安装 SSH +FROM base AS ssh-installer + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + openssh-server + +# SSH 配置 +RUN echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + echo "PermitRootLogin yes" >> /etc/ssh/sshd_config && \ + echo "PermitUserEnvironment yes" >> /etc/ssh/sshd_config && \ + echo "ClientAliveInterval 60" >> /etc/ssh/sshd_config && \ + echo "ClientAliveCountMax 3" >> /etc/ssh/sshd_config + +# SSH 启动脚本 +RUN echo '#!/bin/bash' > /start.sh && \ + echo 'if [ -n "${ROOT_PASSWD}" ]; then' >> /start.sh && \ + echo ' echo "root:${ROOT_PASSWD}" | chpasswd' >> /start.sh && \ + echo 'fi' >> /start.sh && \ + echo 'mkdir -p /var/run/sshd' >> /start.sh && \ + echo 'ssh-keygen -A' >> /start.sh && \ + echo '/usr/sbin/sshd -D -e' >> /start.sh && \ + chmod +x /start.sh + +# 最终阶段:安装运行所需依赖,复制前面阶段结果 +FROM ${BASE_IMAGE} AS official + +ENV ROOT_PASSWD=change_me + +SHELL [ "/bin/bash", "-c" ] + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + apt-transport-https \ + ca-certificates \ + bash \ + libc6 \ + libsqlite3-dev \ + git \ + gcc \ + g++ \ + gdb \ + make \ + cmake \ + file \ + vim \ + netcat \ + curl \ + wget \ + openssh-server \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /var/tmp/* \ + && rm -rf /tmp/* + +COPY --from=cann-installer /opt/conda /opt/conda +COPY --from=cann-installer /etc/Ascend /etc/Ascend +COPY --from=cann-installer /usr/local/Ascend /usr/local/Ascend +COPY --from=hccl-installer /root/.bashrc /root/.bashrc +COPY --from=hccl-installer /workspace /workspace +COPY --from=ssh-installer /etc/ssh/sshd_config /etc/ssh/sshd_config +COPY --from=ssh-installer /start.sh /start.sh + +EXPOSE 22 + +WORKDIR /workspace + +CMD [ "/start.sh" ] diff --git a/contest.md b/contest.md new file mode 100644 index 0000000..26b6e9f --- /dev/null +++ b/contest.md @@ -0,0 +1,280 @@ +# HCCL 通信库创新大赛操作指导 + +## 0. 赛前须知 + +### 0.1 技能要求 + +1. 熟悉 C++14 编程语言 +2. 了解 GDB、LLDB 等调试工具 +3. 了解 VSCode、CLion 等 IDE 开发工具 +4. 了解 AllReduce 等集合通信原语 + +### 0.2 资料 + +HCCL 资料: + +- [昇腾社区官网][1腾社区][2] +- [HCCL概述——昇腾社区][3] +- [集合通信原语——昇腾社区][4] +- [HCCL代码仓][5] +- [HCCL Wiki][6] + +定制算法开发指南: + +1. [HCCL源码定制开发指南][7] +2. [AllGather 定制算法实现][8] +3. [HCCL 通信库创新大赛参赛 FAQ](./faq.md) + +[1]: https://www.hiascend.com +[2]: https://www.hiascend.com/hccl +[3]: https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha003/hccl/hcclug/hcclug_000001.html +[4]: https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha003/hccl/hcclug/hcclug_000004.html +[5]: https://gitee.com/ascend/cann-hccl +[6]: https://gitee.com/ascend/cann-hccl/wikis +[7]: https://gitee.com/ascend/cann-hccl/blob/master/docs/hccl_customized_dev/README.md +[8]: https://gitee.com/ascend/cann-hccl/pulls/64 + +### 0.3 评分标准 + +组委会将从功能、性能、代码风格 3 个维度对参赛代码进行综合评测,评测公式: + +- 15 分功能分:15 个算法分析器用例,每个 1 分,通过得 1 分,不通过得 0 分 + + > 5 种数据量:512k/2m/64m/1g/4g,3 种数据格式:int8/fp16/fp32 + +- 75 分性能分:3 个 HCCLTest 用例,每个 25 分,不通过得 0 分,通过则按照性能计分,性能最佳得满分,按照排名依次递减 + + > 3 种数据量:512k/2m/64m,1 种数据格式:fp32 + > + > 性能标准:基于 HCCLTest 工具测试的带宽使用量(字段:`alg_bandwidth(GB/s)`)作为评判标准,数值越高越好 + +- 10 分主观分:代码风格 + +> 【注意】验证方法详见 [算法分析器验证](#71-算法分析器验证)、[HCCLTest工具验证](#72-hccltest-工具验证) + +## 1. 登录环境 + +选手开发环境信息将通过邮件的方式发送至队长邮箱,队伍成员可通过 SSH 进入选手开发环境: + +```bash +ssh root@ip -p port +``` + +## 2. 环境目录 + +选手开发环境是运行在物理机上的 Docker 容器,目录结构如下: + +``` +|-- /dev +| |-- davinci1 # NPU1 +| `-- davinci2 # NPU2 +|-- /etc/Ascend +| `-- ascend_cann_install.info # CANN 安装信息 +|-- /usr/local/Ascend +| |-- ascend-toolkit # CANN Toolkit 安装目录 +| `-- driver # NPU 驱动安装目录 +`-- /workspace + |-- cann-hccl # HCCL 代码仓,选手需自行下载 + |-- mpich # MPICH 安装目录 + `-- nlohmann_json # nlohmann json inclue 目录 +``` + +## 3. 软件版本 + +> 【注意】 +> +> 1. 选手开发环境中已安装下列软件依赖 +> 2. 最终评测环境的软件版本与选手开发环境一致 + +- gcc 11.4.0 +- g++ 11.4.0 +- make 4.3 +- cmake 3.22.1 +- mpich 3.2.1 +- CANN Toolkit 8.2.RC1.alpha003 +- CANN Community SDK 8.2.RC1.alpha003 + +## 4. 代码开发 + +### 4.1 下载代码 + +> 【注意】选手只需下载 [ascend/cann-hccl](https://gitee.com/ascend/cann-hccl.git) 代码仓即可,编译运行所需全部依赖已提前安装 + +```bash +cd /workspace + +git clone https://gitee.com/ascend/cann-hccl.git -b r1.5.1 +``` + +### 4.2 IDE 远程开发 + +推荐选手基于 VSCode、CLion 等 IDE,通过 SSH 连接开发环境进行远程开发,参考文档: + +- [VSCode 使用 SSH 远程开发](https://code.visualstudio.com/docs/remote/ssh) +- [CLion 使用 SSH 远程开发](https://www.jetbrains.com/help/clion/remote-development.html) + +### 4.3 定制算法开发 + +在 HCCL 软件架构中,`Operator` 负责算法选择,`Exeutor` 负责算法编排。为简化流程,选手只需实现以下内容: + +1. [custom_all_reduce_operator.cc](src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc) 中编写算法选择逻辑 +2. [coll_custom_small_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.cc) 中编写小数据量(512K)场景的 AllReduce 算法 +3. [coll_custom_medium_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.cc) 中编写中等数据量(2M)场景的 AllReduce 算法 +4. [coll_custom_huge_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.cc) 中编写大数据量(64M)场景的 AllReduce 算法 + +> 【注意】上述代码文件中,选手需要实现的内容已在代码注释中标明 + +## 5. 编译代码 + +编译所需的依赖项均已安装,在 HCCL 代码仓执行编译即可: + +```bash +cd /workspace/cann-hccl + +bash build.sh --nlohmann_path /workspace/nlohmann_json/include +``` + +## 6. 安装编译结果 + +编译生成的 HCCL 软件包在 `/workspace/cann-hccl/output` 目录下: + +```bash +cd /workspace/cann-hccl/output + +./CANN-hccl_alg-8.2.t12.0.b077-linux.aarch64.run +``` + +安装完成后,用户编译生成的 HCCL 软件包会替换已安装 CANN 开发套件包中的 HCCL 相关软件 + +## 7. 测试代码 + +### 7.1 算法分析器验证 + +> 【注意】算法分析器能够在无昇腾 NPU 场景下离线测试算法逻辑,包括:死锁检测、资源校验、内存冲突校验等 + +编译并执行算法分析器用例: + +```bash +cd /workspace/cann-hccl + +# 编译测试用例 +bash build.sh --nlohmann_path /workspace/nlohmann_json/include --test --open_hccl_test + +# 执行测试用例 +./build/test/open_hccl_test +``` + +### 7.2 HCCLTest 工具验证 + +> 【注意】性能测试场景可使用 HCCL Test 工具进行验证,该工具基于真实 NPU 设备进行功能和性能测试 + +基于 HCCL Test 工具在 NPU 设备上执行验证: + +```bash +cd /usr/local/Ascend/ascend-toolkit/latest/tools/hccl_test + +# 编译 HCCL 性能测试工具 +make MPI_HOME=/workspace/mpich ASCEND_DIR=/usr/local/Ascend/ascend-toolkit/latest + +# 执行 HCCL Test +# 512K +mpirun -n 2 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 2 +# 2M +mpirun -n 2 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 2 +# 64M +mpirun -n 2 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 2 +``` + +> 工具详细说明可参考:[昇腾文档中心-HCCL 性能测试工具使用指南](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha003/devaids/hccltool/HCCLpertest_16_0001.html) + +## 8. 提交代码 + +执行下列脚本,将选手代码拷贝到 `/result` 目录下 + +```bash +bash /workspace/cann-hccl/submit.sh +``` + +该脚本将选手编写的定制算法文件拷贝至 `/result` 目录下,用于后续评测: + +1. `custom_all_reduce_operator.h` +2. `custom_all_reduce_operator.cc` +3. `coll_custom_small_all_reduce_mesh_executor.h` +4. `coll_custom_small_all_reduce_mesh_executor.cc` +5. `coll_custom_medium_all_reduce_mesh_executor.h` +6. `coll_custom_medium_all_reduce_mesh_executor.cc` +7. `coll_custom_huge_all_reduce_mesh_executor.h` +8. `coll_custom_huge_all_reduce_mesh_executor.cc` + +## 9. 结果公布 + +赛程结束后统一公布成绩 + +> 【注意】选手开发环境与最终评测环境完全一致 + +## 10. 调试代码 + +### 10.1 日志 + +#### 10.1.1 日志打印 + +选手可通过调用日志宏保存日志到文件中,便于调试: + +```c++ +HCCL_DEBUG("[HCCL_CONTEST] Orchestrate start"); +HCCL_INFO("[HCCL_CONTEST] Total count: %u", totalCount); +HCCL_WARNING("[HCCL_CONTEST] Cost: %u ms", cost); +``` + +#### 10.1.2 日志设置 + +1. 日志级别 + +HCCL 日志级别默认为 Error,下面通过环境变量设置为 Info 级别: + +```bash +export ASCEND_GLOBAL_LOG_LEVEL=1 # 0: debug, 1: info, 2: warn, 3: error +``` + +2. 日志目录 + +设置日志存储目录: + +```bash +export ASCEND_PROCESS_LOG_PATH=/workspace/log # 默认为:$HOME/ascend/log +``` + +设置日志输出到控制台: + +```bash +export ASCEND_SLOG_PRINT_TO_STDOUT=1 +``` + +3. 日志数量 + +设置每个进程最多保留的日志数量为较大数字,以防丢失: + +```bash +export ASCEND_HOST_LOG_FILE_NUM=1000 +``` + +### 10.2 Core dump 问题 + +使用 gdb 调试: + +> 【注意】选手本地开发编译 HCCL 代码时默认已开启 `-O0 -g` 编译选项,但最终评测时会开启 `-O3` + +```bash +cd /usr/local/Ascend/ascend-toolkit/latest/tools/hccl_test + +# 512K +gdb --args mpirun -n 2 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 2 +# 2M +gdb --args mpirun -n 2 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 2 +# 64M +gdb --args mpirun -n 2 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 2 +``` + +### 10.3 Wrong answer 问题 + +请选手仔细排查定制算法是否符合 AllReduce 算法逻辑 diff --git a/faq.md b/faq.md new file mode 100644 index 0000000..f80fdef --- /dev/null +++ b/faq.md @@ -0,0 +1,118 @@ +# 一、开发环境 + +#### 1. 在我的开发环境中还需要自行安装工具包吗? + +选手环境已提前安装所有软件依赖,选手只需下载 cann-hccl 源码即可,下载方法详见 [参赛指导](./contest.md)。 + +#### 2. 我的开发环境是否有可能挂掉?代码会丢失吗? + +有可能,但概率非常小。开发环境遇到任何问题请及时寻求赛事工作组人员帮助。 + +#### 3. 开发环境中的 NPU 设备会存在多支队伍共用导致资源竞争吗? + +不会,每个队伍的开发环境中的 NPU 设备都是独占的,不会存在冲突。 + +# 二、算法开发 + +#### 1. execMem结构体和param结构体中都有count、inputPtr、ouputPtr变量,有什么区别? + +* param中保存的是本次调用算子的数据,count是本次调用算子在一个rank上总共要存放的数据数量,inputPtr和outputPtr是起始输入输出内存块的指针。 +* 由于CCL_Output buffer的大小有限,需要循环多次中转UserInput数据。每次循环的起始位置都做了一个CCL_Output大小的偏移量。所以execMem维护的是当前循环已经经过偏移的指针位置,count也是本次循环要搬运的数据数量。 + +#### 2. 为什么execMem.outputPtr是已经偏移后的内存指针,在跨卡搬运远端CCL_Output至本地Output时计算目的内存地址还要再加一个偏移值? + +跨卡搬运远端CCL_Output至本地Output过程中计算目的内存地址公式: + +```c++ +dst = DeviceMem::create(execMem.outputPtr + dstRank * param.DataDes.count * unitSize, curSize); +``` + +* 因为execMem.outputPtr中已经加上的偏移是每个rank在output区域相对于自己上一次循环使用的地址的offset。(rank内偏移) +* 而dstRank * param.DataDes.count * unitSize是rank之间在OutputPtr区域上的相对偏移。(rank间偏移) + +![image](img/offset_calc.jpg) + +【例】在算到rank 1在rank 0上的第二个Output指针时要先偏移Rank 0的Output 1 + Output 2 + Output 3,再偏移Rank 1的Output 1。 + +#### 3. 为什么allgather mesh算法实现中不需要ccl_input buffer但在rank本地搬运建链时src内存类型却是ccl input? + +```c++ +CHK_RET(CalcCommPlaneInfo(tag_, commParaLevel0, opTransport[COMM_LEVEL0], + TransportMemType::CCL_INPUT, + TransportMemType::CCL_OUTPUT)); +``` + +因为单算子模式时userinput mem就是CCL_Input buffer,图模式时这两个变量的值才有区别。 + +#### 4. 在跨rank搬运数据,循环遍历除本端rank外所有远端rank时如何得到每个远端rank编号? + +`u32 dstRank = (level0CommInfo.localRank + round + 1) % level0CommInfo.localRankSize;` + +以单机8卡,localRank是rank0为例演示计算过程,如下表所示。round从0\~6(共7次循环),恰好覆盖除自身(0)以外的所有节点(1\~7)。 + +| round | 计算过程 | dstRank | 含义 | +| :---: | :-----------------: | :-----: | :-----------: | +| 0 | (0 + 0 + 1) % 8 = 1 | 1 | 与节点 1 通信 | +| 1 | (0 + 1 + 1) % 8 = 2 | 2 | 与节点 2 通信 | +| 2 | (0 + 2 + 1) % 8 = 3 | 3 | 与节点 3 通信 | +| 3 | (0 + 3 + 1) % 8 = 4 | 4 | 与节点 4 通信 | +| 4 | (0 + 4 + 1) % 8 = 5 | 5 | 与节点 5 通信 | +| 5 | (0 + 5 + 1) % 8 = 6 | 6 | 与节点 6 通信 | +| 6 | (0 + 6 + 1) % 8 = 7 | 7 | 与节点 7 通信 | + +#### 5. 在跨卡传输数据时,从流上在传输前后要进行前同步和后同步,目的是什么? + +* 前同步:确保双方进入 “传输准备” 状态(避免一方已发送,另一方未就绪)。 +* 后同步:确保数据拷贝完成后,再执行后续操作(避免竞态条件)。 + +#### 6. 为什么跨卡传输数据前后需要本卡的主从流都同步一次? + +* 传输前:主流要通知每个从流准备开始工作。每条从流要回复主流准备好了。 +* 传输后:每条从流要通知主流数据搬运结束。主流要恢复从流收到。 + +#### 7. 实现allreduce executor时可以继承非reduce相关类吗? + +可以,按照自己的实现思路按需继承即可。 + +#### 8. 由于在实现算法编排功能时会用到暂未开源的HCCL平台层接口,以下是比赛可能会用到的编排接口范围 + +| 接口名称 | +| ------------------ | +| HcclD2DMemcpyAsync | +| HcclReduceAsync | +| HcclReduceScatter | +| HcclAllGather | +| HcclReduce | +| HcclBroadcast | + +接口详细信息请参考: +[HCCL接口列表1](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha002/API/hcclapiref/hcclcpp_07_0001.html) +[HCCL接口列表2](https://gitee.com/ascend/cann-hccl/blob/master/docs/hccl_customized_dev/HcclD2DMemcpyAsync.md) + +#### 9. 在实现allreduce mesh算法时若用到HcclReduceAsync方法需注意跨rank搬运时要使用sdma协议,rdma协议暂不支持。 + +# 三、算法调试 + +#### 1. 算法分析器在检查mesh结构下带reduce的算子是否有内存冲突时可能误报。 + +解决方法:确认无内存冲突后手动关闭内存冲突校验功能:`checker.CloseRankMemCheck();` + +详情见:[集合通信源码定制开发指南](https://gitee.com/ascend/cann-hccl/wikis/HCCL%E8%B5%84%E6%96%99%E4%B9%A6%E6%9E%B6%E6%80%BB%E8%A7%88) + +#### 2. 使用算法分析器调试如何获得更多算法执行信息?怎么看? + +可以在校验时把算法执行时的Task序列打印功能打开:`checker.EnableTaskPrint();` + +检查以下字段是否符合预期: + +* srcSlice、dstSlice:src是要被搬运的数据在哪,dst是要把数据搬到哪。上图蓝色划线表示rank 0在把本卡UserInput buffer中的数据往本卡CCL_Output buffer搬运的两次循环,绿色划线表示rank 0在把本卡CCL_Output buffer往本卡Output buffer搬运的两次循环。循环两次是由于 UserInput buffer中的数据量大于CCL_Output buffer的大小。 +* BufferType:内存类型,比如UserInput/CCL_Output/Output buffer。 +* offset: + * 偏移,用于表示内存指针指向的变化。实际调试过程中出现内存越界、内存冲突原因是偏移计算错误的概率很大。 + * 可以看出绿色划线的CCL_Output buffer的offset一直是0,这是由于用于中转的CCL_Output内存地址一直是固定的。而UserInput和Output buffer的offset一直在随循环递增,每次增加的大小就是CCL_Output的大小。 +* size:内存块大小,最后一次循环的size是尾块数据。大小取决于UserInput是否能被CCL_Output整除。 + +#### 3. AllReduce的算法实现注意输入输出的tensor shape要一致。 + +假设rank 0数据为:[1, 2, 3, 4],rank 1数据为:[5, 6, 7, 8], +则经过allreduce后两张卡上的数据都应该是:[6, 8, 10, 12]而非[36]。(注意看初赛题的说明图) diff --git a/img/offset_calc.jpg b/img/offset_calc.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5d94577d844b358aeb4b5a5e606a1da5410c2180 GIT binary patch literal 29402 zcmc$G2|Sc<+xBgjC6pygjVzHhdxViflI;6dse}+^%Q8h$Bq3y-vhT7^gehxCcCzpL zI@V#f@9zIR75cx=^Stl-eZNo5?>EbRU+a0D*LfW0aovRPgi(P0vZ}f&Ktx0YTm%0B zgaP0JKtfFX<3IRE3jQafBqJjwC8MICAg83EqM@OtqNb)je1x9%FvDSLYIr{HpC6n`A{j*?rs;4_nM`znvb4Hv&dRLm^LSlM__@$sJ)kd%^^IV&r7;i8JF z+9mbNdin;2M#d(l);Dk6wz*?#cmIK_o4beSL;oiMPXnJl4+@WX9T^q~yt<~guD;<*V^c?GS9eeE_rCtI@rlW)>6zKNdDPna#^%;GdS~}X zzlZ>me{}0t&kpp90qhqsDJcml#gBdw5xavg5(ZMTlVapY&g)QIcRtD`{+Nlj`{tMdiJYhzx&k>(2x*;lSje;KmnXyMl@bA zg#b9i;`B?75P*g*__FA_9S{}7|CZ`u1J+Lzx!_L#W>4iAC|Kjqvy5zhCIA~pjrf72 zAbw(A0uY)B9kiP67QTo?ZSrA)vHhuUiT}D6iuDtME%?D^xxiZ?`18!r&Ck#c2Jp^| zIJ{yqSS2#HtK=xGp$oPwvu^rVjl`L=1YnJ_fB@tjA^>lL%Jes2JEX=0pqG>Yw5b5r z-fHO3v5_ZWT7mLA~>BQLLsH(Z^f6U9%$3crnfwY`1h zl|lUVqc@hnsZ#l3b1(z#CenCy#2N>#-@CbW#jtP(;S{qqII_S=0K9(;XKMa)0$|@7 zVtEqnKE7nC0qfVRv7%MU$}zQhlIwl$M1i{y+~&w3QM0?@11oN~NY&n}O}%eEB47sV zhm0fY6fTT1A)YvGw9*Zsw`=4AhBF+6YI}MjE^?92(fchGi8BU&HMsrYa4pGYx&%vT z&r8cVmCa6#?|gTlo*T#sriHC&P1lmvBGHwjS_L1Ct z)#pF$hlCWL^J=ybG=84uclBdo6Ah{2_3#=Gt<8S|KjN)t zKhe184TU_D%%}gHrjq`p?GhVbknxQja*kWh5!=tLBMsK9`sl`CL3nN~BieQqF?Kse z(ssj4r5ptd_I{SmW-jV1sAlWLku}HrHR}-X>m2)5p?f7vEnfD%X5NEc`Lh%GY0>2s z>TAV`QPapF@+!Rbii03t!3zJeg#aX*Rz!?Lmcmx;$BaT6p2+J>d=!?bg?>C@`iw8; znuxECP*{Bo*bCI_Etno?-9jYrgfu5 z=x7T|J+!c{J~^jvz^*q@?qk=A`~`VbpiGCt*Y@`jvR|kGU{!YXW{-=*eW|Pcu{6!H?k$gjHM#z=Y;j1ipf^Uu%iroW2ZinK{Jk|uh3=4A!Fnk`r=V&6mde+F0AR5Lu+eic)B{f=02_pbd=@Ind5v;}4->&Q56~%Y?<;I-bEfL{v`PZNB9D61G z5;Nbsw=A|jZtvi*BvoN9!@yCohJk~U|6@>Ous&+|37FKsACvtfoSmxn@WDs%Xy*CE z%hx1nF0rfmY~~*Dm7eZPQmgPh#$dpuo-=ti^81#K_Gml|E+{DG188FQm7CcO*d zuWOy1>$>gRh9q`fc^;L>hN642=^&$m4jU7!t8ClUcysg8rL!iv=>jG%#M3SSDXZsh zLhdC6qz$dCcVYv5LmiA7m%Rg1eOqB1SkZAPyX`I{*qc4gL#=@=vc4c=q@<*_zaD;H zo=1bJpt^pRk}_O9oMpN%?EL~3f{U)WwYqJH=5>W-&FZE<#wujYv5XIXomO{br8-ua zU8!0VK0vuh>vR66(P!QXEwg`6xWmhzx~6+4Mg*MZw0;L{|CQA_#X}wm1mMmHqqh>< z%LrUV7FrofSHs-ry_sNROD*ox_U8IJ1s}9#xFY`(vH%%U#q2>n){{L(>r}IAR2NEZ z#!kJX6o-+pD75Vx?wP7~;yg{pv$lQOoTxo|(Ud>5Jlq2MO^X0HhWm8BmrPkE0J&Vq zt!~FwgUrWcpK~;mtGCnYdr&bgwe*6mZ0ePWC$C-$eJt`bP(SvCX1co4*x=`5GuAO#on_8*X@T;X4)!oJRJX$M5{Lc2Z{ePCI@H(oyH4+q=H@uh97^ zCSFGDzC}Bq8}FVrj&Z2_pm=ZS&9TWH+s>JZ7ltK!9nfoF|59w9vLXPjEeJdbf&j1> z@xP_PJj<^rh>N?wp0DACUj^HivfmeEF2lM*w>t@dGLtZ%xBs`~CL{rX$2nmsobV!f zI!dh4&ckuRO9vN$a+5%GpkL?wm0T7^v#ynwRL|M&V5dKRNy^NLV6HlD+4-H4BKvd zd+1I^RW}88D-^%W_^Tkm3cVYFtHV3!b{p)iE%kQ|#rjru{&+7n0^mcX__F{a@=yZ6 z84TGSh3`^kN5WIJ7#~{1%0J6{|5o5D5yhSHT--l-OV?CKSsy9fYv}6jg#4;&r*$hbk{6x3im}d~Q0ZAOp23^V zMJ8xHPV`kM3CQTRZXf_-x*qKKC#`g_(JVSVS14LPksREsBx**`0$>#SEH5u;T^4?S zz12Z)2R`fqU(J-K1+Bv-5V>jqZUzi$SOF~(SX<243YO_S0jM=HQsHR*;AIE@ieGl^3Bt}?u%Bl?RP~r&!@L3;%RdIcw^A!_V znu%>)k<~gl0?BwLzQ@SfE34ahs<+_oDVUw~(q>ShL~9!6*>tcZ$ieZkuwcBeIJT4p zlfZn_J6O^n%(_LqRi&-!q_J(!D`VpUbTWN^e|d?n zu6j%qqr*?A=n;TJ%JE_(`0J*w5851>^+p=)t{Oi-Esg8lNgnhUfR%S@{a{xU+X=vk@*~cDOSoup=DDM2@TEtR zpE8AHPngn#)2K}}PJVzP%D=@}QIDv8 z-2E>RkNI2Ts(ZLJ=w!AxXtnP}K+l=HQNFAAH(xg8e`-B{D{o{ z%a`Rl3?CRC!fW9eHqZ-dn!0-WJPcffgRvd#2yD^T3MXaT-5H3E*^q@`z9Op$fTULm zqmISQK$tlJ$cC(>6=bmjxyIH}wAS0H!}g5K?dE_@&W?dnDX387dYJILlB zY&IqPHSo)u>X-bCCiRJg8FiG%e!PAG$hZ3)eMXPqN-=U*v&u~r`w@{1LnyH^8tjWV z_wQD-Ko4}1!G{2tLkPfmDVIW5;}z9t56khPh(_;#$B(&fSE_RN?@VoEO#=HI_&#S( zB=-vzM-?4xjP8G@BmlJLz3A3NPTxIUqv*7=W9DQUiBj$3V)v1QI~MVj_Za31tkDn+ zdY)?*P6R-0&UFK_cf0Xk!CVrq#u}nwGtC@u6mJh|s$LI*?39~&X)k~zkzcIA^`yV6 z2NA+FB%ak$2*r!=uMo>xKom3!r^VUx4mJYhsuy)@`FV)%oeMLRv@kvHuJXQHxVvL` zZTQ@q9wm|hR~jN9EFNMyi9ZKxWI?s!9_N8HfSw_|MqStGX%$nrj0v~HsjlqIFWilm z=hL@%FlL(sfZLrkQ`N`Jei?M1V!p>c2!IwkZtRW}PK5o9&~%isfk?NZl66>EA=4Xc z0q54sbM{lBB(Q4Zk-}U49!euSD_Q0x6ShQ}a|IavD(BLn9&?|;`-$=6C3mZMO|f!l zzKL9?nnWQK;Kd>34?H9jP2cRvY-(F0&7$7GsdDD{ae( zhsp~BWVQy4qAPE?x9#2tLeJEkIE8xpd>nq*j?EmxDrL41{QXAHNz~P70&t}4TG*qQ zo6Mz5AWnR9{BVvy+e!R)04UB6ROXvx6PHLdixr4#C?G!2?mog`z=&qSZZ#co^@uAx_cAscJ4qLRjn%j#ohsN9ee*KGg-;g{J z{m8EN^^>*iR=y;@E6e;4MyrGEXd9ooJvukh|f zr3}k}qwweTr;DE&_?oZ6p&U2<^k{o_KL5S3a&lP^X0c;>dN3sLEW$}^dzM5wD1BdO zuC~{?0Om=V`@`gRC6?d8?H=Oq5$_L5FcEAlXYk7IC;`|#lpU!BZV#_iwsH4hIMVJW zfIle+cn+JG*V<;;Dbe^%3S{2-i%(447ldkoA?hoYP~1H&9H~JQAUH$UZ;iimBOm$Py)|;El`9&LNsaFD`*{o`YxN8(CQM9f8JRO&PqP7G*mi>X8yi*~og^t5t zA9|x5zvn1WZ+Rnpxa-={HJ0TlxBT%p)(DXLbUOZKZJbnP>SBz(;OACZZpEZZ#~??| zVVv^WpfJN=gk|(FW~J)mO6KG+En0oTfyA)$X)el4uUC>*!T)?3%;tiV&|?pk*wG_v z84wQ+$FqD)omg=+NY7AB57g3wU>J4>W^XSeSLzKHY+8S&Xf^Ecyr-ebZp0MWoK(5&q&d) zSRS^Tjz0>28s&Qg--Ti6>u;&*+k#2BzxpzA|6cu4Vuekm`-ZNrUCRRmUIQ_BxKec| zu4f55OaQ{x;m<4hGCk6_QhK2U@)z!pIoJ(UZ>>0Fe5SgZ9n)uWGXQVG__E<0)%h=Z z4K?thsp5xLL4m=p7$nO=)u~cx_ub+!;R53`ZVfc{kuPXwNH|{|K4+Ak7M!k@-I$dc zP&glcQFwUsx{ieEVP%&3@we+QCZt^8GUdsWy~6|2i6Y5`{)Oh1`AHV;CEZH}qv{w5 zbf9*JWQc?%I>sya`)ssf(VSaLni|c$9FfC3{g|{HLG%(b%~5B0UbrQmLu+Pv1do%4 zfFu?jy3mQnF3PaJdXk#4{N?=jFFaynU;16>T3YW~bL?s|;!^V9AnogmxG1k_xICh9 zH`iv$9`?BlwEwSDf9G|&pdjmqUPng|0PD*HU@rS=UOcq6_1zZ&FnkNovXeZNkrcF| zIJhU^>e|EQ{Uen8zL)PS(_e&5ENFvq=>!2#;RPefB+wOynjibG|C>!V&pS)8w zCLsW4a^=cFJaSYqkedK_Mx(8!Lp_s{es)*q=Wyk%FIE4JuDi{G?-+_bd*wZURtGnx zyQgmgqQ83r`87^}-(E7ewaD+(hbme&`XbahEqbEG{$sK=j6+egb=Ox;tdnX2@1^&*l|HHo0b6{V%F!&Epyr12a-v0Rf zFVp4v8+4Ur`j=g%Cli1#Tn+dSe0V>BDZO?uoL~T_&HN}B-7~p7qWVEb^cJ_$f~g@- zuk3m0Sc8}j*9?`q#RZz%7o=mA9dD42Gd(+gFTJg|^LM%_MlHs;x{}xOb*79x{15@C z9sn`;Vc#DZJO_fcfUK5cL1y6s9Ap;kL1ux)%A?(U#K@u*j;~I~E~3xjnMXhvejhX_ z+G{^ZY>-?)faC%P7KCa3#!z%Qrv5-KOEK?b;1&~f#-_&VVBu<5VI)Xf%z(578)%hv zy~C0^o34Y<_n8FXoZ{g>%7%_PpxN<(P>?3+p~H(*Eym$)LRR27(yn433wOFj0ob=z zf8_p)LeXZvKe&`ukV}#B2Z{FliaW{k3m;(D_3&=q#|Z$n6UcAK{y-Vz??D%7GrPYHuG=Au|7CwF z8@A-T7B_RL$C>a6?qs--h9C*r?wnvAU7|GG`OAgjhgpp|+)3T?-|j`BxhK`n4@n>? zqG~8~x?aSU7q|rR=eAz^f9F-xr$<22vjl-Bl>zxFogBo7Eo{Tyd(mpg!^w~U+$i4T zc8MtBU<5$1X9P_@f>k*Ia@$yp8Eo_d$Toq5Vq^EsN;!*BFE3Xu-Zeuv9@_NMrjmW7 ztAak}cj)z+NUH@2Ql$1ySV||5O9MGzyiYE2^&}DPGS|FYXV5qK7(I`}TNhXhDleMI7of7o31cq@*qf)(`7L*QHq*^4?H zo{7U{`{QBYS`6D5**hr)9VwM;7H>4X+_ZSln7WaMB$qbrd^}icm*L1UykP%;3Y8t6 z_B0zQS%s{7JW=Q_rOwj3L1@W6SSuR3rug>25bbSt6&MW!bfawWe#11Nb8=iE08x^# z>Une#==Ge)VPi7xU}}v`q4-?jRvsP5S+)I%iRK zogMml!LS0fx}B|3FeUi(2dO!QfxUn+EhY1pBmFOlw6+F{>=rRY*XcK(I z0FuJ$M{$!bTF4{52OYB_m1E}=cc#6TS;Jk=r_NZ`djh> zhA=uRz=41yb3dQ^7dy%DD}9}5_bai}EewD}D{wF*Ik4}SDUtmhgXi-%Uq)IzPISAD zU(jnC2P5)zx^F+orc?515=w^k<{QacX;0|T8xezKYJrq^13s-Kp2m6u}9de(!uxg*+e-8J6;MnFCYBM zL{)l9CZ>^p$aJBlAG5o$k%Yk;cLcToaB*iZIAyT+2$L^~v)gd0bOv!RstriWs#4-1Quv>$9 zq{FZ}`;|B54fG8stQ#Vg@2@@OEbaMp&WwX@aT6t2eP}iNvz(wu_C*Wp#WUa{ut`vb z6nj=mdwL-pTaVS-^7Kdcl*5+lMdA}=_>Su>O+H-C&N7{SXfd7aBR(CJ? z-LDO^L{c-m%m@^9eLt7|u5+6=^t$IUxK^jsxzl-hJWn#jiaS!;bFS<(8mLPX%UXw7 z7}Y{)8z&${=wRii=NbE>;0h@&GAcGNGWZ5~j95okUih-M%R|zQJ>v9AS9_m5STJ77 zm$XnmfBbtSJuz4C%9R{don#sax1$!5ShwP?845Wo;OzTtHDqXW;Z|xD&UH;rlni^d zvqI}_gQxHHHm6=~AFlEG<+1z6=f(7>#2VgQ75@MfzPcw%VsDCadp_1xCYP9ReXXa5 z0N6!}IO>XrIreir<)$*5YMkIZZa=eRf3fQUjJyU^CN()o_b)WGd4x|NvL4`#d^z>p z(tsnAGGmcW@Wb+FQlslh=FdfTvv$Eg`e0Bp^$1b4W1Otcm!Gv4YiRMN~UY@Mi2K3z{;^1(|@ zwptdTzbD~G=H@yDEh5jp<+*BA6LrSov(#3c)0X|f%~z}rw`ls*S+ZmbKIa=|Wxc-L z<#u*KML8uYGUvOF3RBoKqYPhggF?-q`0(|S1fbI}!P3g|kG#i&G5jf?!)o|-(N$#2 zi6xLW^4yx-gRTEB>SRc*Zflu9S0-Q~7fV427Au%UT5GQ~8_w1Lol^oZ;t#=A!`QKR ze!a6Zh9An|V?jj@0y_{IPZixSySO^=bCje~z`)Rh|#l{6b9M%by{PRub zowrokj!0YQXiPAOZrQsl=dFtPa^Tx2WIf2>{#5Y+DqA-Z ztHb31jUop=lPBg@gU^!1S&KX9C0@zU=Qwu-C`N9QT^vDeLNFoNo;00l!!!BnMyUdt z{WI0De~&ICjbyre@6UkgvHuV*p|T$SR_Xl4BfbACJo2^@T`)Q)G-Ia4;bBLe^nxqO zL79~S0y%y2$mwf`#Yt#(UzTjg|HeV7KfqUynx_3X{G_k5?Z^@DY&3{I$Mb6z;y6cJ zXiM@VxbyGjISroP7VwSi`2&tksK}gGOpa2&dUe@pS?xbSVT0vH*oR1*dmI5!Eiq38 z@g@8}1&VgR6(bNMiy90Se7$H4dJs@1AHc*5D{TC`<;#%o`UD^WPI0gYUha#JeXR5E z!0276slVjs*q{`zmlwmm|B2t?ImF+eU8Y>JI^VrFr)a7i?q6=>ODFvFaB?QAjOz{9 zPb&`n`6e|9q2QNWSN^O)bh|ev1UI&#`^#7*_v9{tzg;CXn6n0XqK`$_z5+F2ARK60 zZ&k1x`DxSI;5z#)sVh@r@cFe2m|Dtni_=A`PZWwRZDt$%%5Va|E|L88PD+^Z_c~G0 zPyv{JOK<&`I$>>!Gx%L_od#$0Y&jK|VaOT=EoIc^N0!>i1DLe-UXZ-^!&{PlZlJN- z>6C*AN;9OIgxc?G`>1z%;)z_Ja;II2K`@oB1A=@am!R~mIpi$lKA6N+0hx#PeV^J$ z7k;L?vrJpf`l>o+7Fi(99zzf?4XgXFOX`v-@7;2Z-!@`4Vley6uE8C_+Mgw^MK{Y& z?65}-ELiOgN%!a+?+qx19~ttJ_C>9G73fuXoRM_7iKN7#PJ$)tuti-_oh=kr_iAalR!GEvbhBMNj_d{pQk%Ox?WzntxyLuV?Wf*mDd_4Zv)ogw z<|*REN9(_|Ym)lsxYUlyJ3`AxPn>U%`s@vIv6r17Rzu#qwZr_CPnSVQV9YF@Nqhoh zC>*DDMOw|Y234Q#JeL3Bgg|9`s`NJBr=sDZLe5)9YXqr{uDMWOYa&-WKc~MKjx>N_ zqvP;IBby#gE~${`w!WeIA8nEFajyWw(r%#@k>w?a<FwB3b<jcNzrtV6@ z@vmQwZ|fIzTJ||EkCxbHs#H)*20gW<X~uuf(0-cMaK#+8@>G#}K?cu@ zL{wGuV$3iU?*C^4F zdVOOR!hRXC6JV!US102|0OG9Pg{7irlzQb=GG^u6vK@S~ZNC&+nhjK~J4M~1T0Bb| zDXZ&#SRCL!7de?o-?{Vv-Vdo`Hk5Ab>-#)bDSg&qY2pxv9q$Js12ur?yHRe3t%+1G zwjW2COLl0j!JcnBg#c(L1qobHuDr0N1;*;o=g=+5e0*&2{Mzmx0*3t}1v~zvkF5x< zJ%T%?U%G^UoYQgJ%fJ}y4Yhup)cKhv%>11p`b+cnL%BxvE8pWuQBT*87!&;jWiXw! zJcqg&%be|MCwd)$VOpx6)hHw%qH#=~Rz1ci|6tAnS`7=XqC`);twN5%=xfGl`5~t# z15#dIif1Of8B*QII>9OT!jJL`XP8&h!ZqK8qP7)KM)(WqDE@i}=2`Y?3)Vw}kxUy_ z0X8+vrm0oQ&d?xNu_WU-rM$3jsE>jgmA!|a$bPb3v~T-e7IPK>5KRo-JYR%pT_*si zG*o`UpI%m!)%b{?F9q5fe+0te;U`Krr)uz+HzqY~>ea@j38il{tULnofchuW4hLc5 z4yTK5_P=AL_0}g)Hg5{lhUSo ze!{G7$)3ry#M+W(tR9%Dv<8#R7?NSwY;<1ePY9Q1H6G89UPH%HbdH}*VT?ER8pSds zU-IdNKRlhe(dUoaNJ?)S$iok9ITwJ*Yq$gjS~ti~@Cj&Go%QX6tQk~+h~qnp85qs% zTKF*iY)bD}SV=%yCSPrSU~sZ!|Mwx-O?#D8WNG*km1n=Bl0lUCVdrnl4gT>j=Dlgw zQS)fi;4jk|d3qR`2Iz*^4}~o0QIG_6fb><#f*Ge^Fyq7tCjb%lUlB1oGd!>aU+ARk z1EbTpxC!{~u?jF_=649>NTov;M>xPeu?|dw(Svs0SZwxHmK(c(A^=uErLoV-Apa@*`C=p7!B>N zlyygudP=HVfC=Ccy2Hf6XZIq$?%!6vYdSsnt!FD3HHWyGaE@as^pmD|mxtr&iVOK8 z#%P!MbGfIQ#yz~VEgl7EMe*9Q*iy;Y-v+LoY5W$IyE-p?P8;QhOdGMIcFyU0U*0jn zpio(xNfg3C!um?FjhyMzSyu95XpuJ}_L#fBP{oBTk4N9l`Ufnu#)5l7gnL}I2YAR&${Iw>to3WYeI&g^ zWdE^Ok-6Wlitl5ugH(c<^2e%D4bAhCja>Sb&kucvcsm+Ax=l2}|4D}89W(6TH7Czy zCc2q_O;wdL;!@UU6z$PA_4z zH^9^?B_EjV?T;F$9Nvv-9zmxeA!{NgXo33X2Da!RT{Hg-tG*g*p)1W(q|XyxXAHzo zUx*KvyC>*drghMgV7JjT_@|;^!qg9j*G!#qxy8iiE`{FcEa+}GPd`xM!D;D0#K?!? zUD04RsJr)1O6kzSS*t{BDV9`N935wycRw>^MaOtUJ!jm@BM#5|&{vE#?8^j6lq_eW z#N*EMUJc0bI-!fXJh*oIX~f zB)sj>s3h2R4`)sZGo{CXSYieCXKWV-YQ0m(Y6Ibp0V! zJh-H03=LZ84#Z{{)K6lRDkPQ%5RNVBtV8iz4N?=xqfqO7`i0o^I$`S1{nK$t9@OJx zDel|-KHt{1*uJ{t_<2}U8YIOvQ42)rt>3h{WBLqJ+~L)4-?8q5GV|OEl~v1ATdItLTDj6ghi@9;}N5IJrw_aYmM^aEp`r z@n}CgZm4#{ykL}Sfg8<)>TF09LR;lyl~H_tVcy5{cxSetPr1s1hRvKwPR1k}2(2$) zdk4IfvY6p{Pdzt6;!9@_rNKkcHOk#0cWplQNG+M^BfQj)CmZLb+Psjxse6o^{SoQm z+RlV7UZ>}$&3aayJHc%4Fb;l;-aX!_$Qg;(g5eN1%O%U<;@G?BIj4yc5QY{(>gbM? zR0T6NJRhy}T^IC32ztQBL|F(IL6Rtz-E_cA&VA z+marmP!?;Zd&q2{(M>X#=TWwhI;VlTjdP3V3#&qz=Zkn!?BUe_+_b+d^ij$EOnc5| z!GR&O$IfLZznZvQe0#mxzg4j+VfFrKZxEi$W5A$Yk{WkT8!h{=#q)Zq<>2M$ewsy- zl&6D~eYX&|IRk8z<6cVmS-o4!iagTSr0G_YyfbjM$H&w@QGW21ldthe)fxd%@S5wG z!-ROZk2}Z>)Pd?%FkzWf;(UB6{b>cQbWv*mR~aFh@Zo2ct`ulLfk(~_6nnHpI?Em&PKdWxk%%Z@J{Z2*Ti1O+wfkP3aqRu?bl@_7t>Q}digVaU)EdRCYR zX5fef^xOvy8n{aqLWuh?7}&gsvvRTZH9aySIVG}%<%%9}Gx`Rqj<60B&?ENlcnTEM z|K_Spjf0`5WHy`~Oa!Pra2$QLV^38_KXk{Dgz;#TlJaM@mE)AVTpz&FVtuM|SA*-f zjGTpf5NyXcbn8dyo7&ED`Zrv-WOL!|`Qv&1X*D$VjA8?-&)H}MGPD7b_e`KCezLu8 zhoqKg_qw+vT=y-+M)KthZ_Ha|b%k%5t`7jCZ;;{!Y@|~la*GU1Mr`;-TdDQ_@tuhQ znsw$jWVuA)Fjo+N{u%7(55d|O6w8YTbQE#YtTRbPu&spAD)upVskb~~6d-%lGIbXWWGbOb) zh5Ve))k|*QIyS9x zSB#W-t=^M0t5Dr43poaB-ko>DSzOiyk(f`*kZAABq?aqxGy-;{_iE-6*^2fL+ zhj*2g?YX(r>-cZq>3H199p1fnfA-ZTG};p?<(|lhYMDl&j@HLAh7APMvI<%D7)Mq# ztS8wxOcqeQ5_@!ZGfCUIa`&jfksy~@WDDO;a%obP9ZDuhUI#^T5q&N%cR3#LN$F*=Fg5Vj_T)B#>KhV`&4fe_|M<{OE zY77>u&>zGPO7gzs{XUYYavz@qtAg*+|5LrZ_q0pozQfdjz)yi)mg(Lk_K&A5{K)ZA z!TR4502h#~A-VQ`y z&4W!z*?Wav{?B9GnRkfcduy3ioZMz-hT9-)9B*F}Rg8=AppVSd}=U6*QJ5-Rk38s|}{WvC}%<>Gjf=>XE zE~vJ&$$_;6j6*O4ARV@5aWP-At9)TQe8@yyy~K6q#5VS;D3P~3N~;D(r{1Qd zaxIhfDdcb~M_~r^91F&leIVR8b8?0v^`Q!QF!tZfKxf43gueI**^iCcyUE%AfsxM( zsr@wZjnF>Jt#jZB8LEZTe05YXcWN*`cx9L{NMgzQN6%cBAq7K|{Z9i?hsV9BL&R-R zS}nE@+Ka;`G&(;GTDxCWYNheuJ}XO-=2sjFWAwl)f3Gvy@8rNEq!$z? z^q9}mVZ4uZivlaeS=#nw448+`z3PD|f)DKl< zQP(@O;BnoF={F3)wshE{^R3MdmrM5C61?|BBmaIU_i{!*AUBm=MD{{Y|M>x2f7>s@ z>@DTqHn;C757Z*&s~J6-3IT;M*O3_1DIgAV^m8nUZ(+|6|yN$D$swyCZnUY&V@y`DE$#e{MU zPBSwewk@xmbZa^NlGD$FI`ZfRPR;qbFh7IR@^aDSJFp`aC&#@u3)(9TlI(hOQtL`y zCew7#%Ozyhj%rh={~hlH`I|3q8I9T`yn&T>ZJZ6_PpYU{e*~T_BKayAVr_*^&$rJ{ zv+qP(*O)3SmYZu`cOs2;?`0U(;Fg@@;%#KeIqsHup@+BRGQ@uvF%IfS1(R*@-uW)z=4t|_D6u_@R>t=l!zUA&}q z`F$+LDgJ`GlhD27FE8>xJQXZ?9s1@>ok96(UOq~~0;Tq>ziHx9VO&k`2}t|WCaIW~ z?CYrTu?hLwlxNl1W2-qPMlS~3g6!>m<&oHEt+q{B&M96&ep{P#*WT=smG2w2-uII( zq|QDJ9v*Ksi#qmTM?M|5-t;l~*g)hL)}2QU-k?AT9&!d9SMMv;k>}vJB=o>UZ*g5Q z!kuBQA|U*Ngq(xnpqX7lYBVYNyoX20aV?WoWVHf0gn9vGb*DcqUh4LT&Qn2x&{NW? ziq`xljHUP5qqJptZrZ#p@SC}S%CAF<-W5&KH056r%Bzddbr7<@$$<1`X*Q%uakx#< z#+I2%CS8J%sxmKy!N0uG4k)`>V(VSG;A<#XDKnAo=jySsvm|AkT7Y~IG@>Kt)6`cS zx@|2(E_S5K*W$%zfA18MI#)JW(1S`ni|%4CvwRudjwh;X5aGbc>C<=cH_r3{iHE>m zek(tf>O7&<7g{q+)*&yO4a&>rOU(xAqMn>;Ipkt2N0%d>xE z6~=WdFeIVaC+}k_Of?RZ?{yU2I z46)1i#(p*KZe6|mc7@89S&zD~T3=IDV}CswdhecZeNBGCzs0s=Pos~^Vm-MYmczc9sBSf5ACz^x7!Q+0Roxm=-Yq1dU7nhBUf!^Va*YIM_*dQEYo9j7{S(@QAF zlH)Ih{KD)JJ62Pcq694W`z108T zbK(r1U}UN=iH*hz@IJ5VnD_?!P%-9X*1u0ewPuC)YTOY;>r5iC+YxJ4tDyXe24^U3 zA%c35H>y0DP2|6pc$Uej5w^3!H;Ek6w%>PzU+H%x{6CA*FV-eA*jr)h&tJ5EKR$@E zA{u;9!J4lIo{4O+G2Nuq-T49PFZIK#!`(XJ9#-d#1mB>OE@Q<}&J?7!EN{5+A1VP) zn?UTLJa5lc-_UVQuR6KFzz066-6F+Ey4jwdE1V?{$WJBOj>aV3w#ZXvy!hqH4+(_m zBdE;~bZq5WgmDQ<@nc3)ScR|3q8pd^F>7dfiE1mBs$A5s0?dg*aF2wb8;Np@v&5pj zpuWEX9uD#y-uajtwhZ$~`l{pS6}mU>NLJd`m<(&!?Hrj)W*@tGLw5G+SqcuS-O{RS zxEwHqO~rzd?t9e|@bLr1O)H=3C2Fh1jWpvwz(>EVj-b!yz-GC@EIWS*9UiKN{9+vL{JHXadv3$1r>;H zHF$6$g8+;QV*M6pkYnUv#Pl{4)G8!(LD4wK>W}PCn;NKu(6NJHec!)B-9}C*zzm94 zi)>5T3?t5&4BEv;Rz#ojiUcZjl*82mu8fxXnmtM<%W4R$xPPDgV;0)0cKlnbUgweu zx(adXoLRQAvFBUaQ(UR{noJ&zKIKGx%ea<+bY)k73lw#L0{kj_gLlbKBC*KBPxBA- zAj3)V#N2ji#6V*P`U%gMb_7;mX2$P~hXg&0Ugd6SSvqe?LX91Bw$sBeQH3$Dc@`)5 z>1Sw+U4U=)uU6spT6Abc-3hMt+|A=>r6akXrD~ujWz?rA!c}zIwZl{V)Ho(m^;bVI z?(klfai6Wa5d^P05ztD973!#vefQngusr&iW@Wq}sSU<=4H~IR(ae$MKbr+U#_Syd zpp**5k$Qup8o&&GYMAR8p_do+%rvvz!t-?SAPwUf0k$I9HR+Gvm z&$My%aT#tN7J2tHu0bBkr)olJxE~;k|tg%NMI|m4UJBfeL0%mZ_*tcA9!K-il)>@vNiAJ-HYeO?nQE?v+0x~ zZ#{5OOlV7qw-UtX<=r1SU}T~DE!~2aDZT4Ga(y0(rUG+Rj^DfvwyU=dS>PLAdV&>& zt1x2G|;eP*msbT{ZOe`Av8g! z`tpQ$I%&c#Pq{>A^T>)-`p$`+pAIegr|Z&fx8m@cGJEz$73w`RWBCqNp^#T5 zXsYS&z?bA_%hW(evp%fV2&+y_+Ece~`yMU++r4ChPF^Nmzxm>a9|~2G4)C#Yl0@%R z^!8q%wigrt`-qI31S8$2nj$D=bKrqe4im zCgi}+;L+)yN?4nNjgjKpS9zH^vJSd)N)g-LHYX3tRLK4_@&kNA$&UY45qNX->I(idTQB})r{ElP zADUT*E*U+Dh91G6STrFY;x=uKIm(AE?R@A+A~icTvho0T)!bZTx#Pi4~*9CS* zQs6ru1?%U$K=m%+yYfybm}XhoQ(EOE>vBX%xKg1eeIZbOdg?X(dG2={y+z#?h7A}k zjf$d?DW4>kKwk-jOIW=7f{qcLtg1`pZEhOsRu^-#ZOSJxkLZBNmd~Lp{KstNJEWIbzfrcru8(V;`vhaDxHo*VGVrQ{ zZb!A(QL%xoRuSot9(qrA+Q$VhBWFgZE7?DvU;~|>%;#O{fGv0_%*IsBek&wSz9oo! zsqE}Gxtl#hcUIS&Vl=EYg>RQ4;iF-S?PgK^4|mdceQjDyH8+?chwz8zd&7mJ672Im z5i%@1s!{XxlO!kQ`5Q_2Uc~NRW=;MO!27(%I7R!*tV<3Zr=MJq4^s)7Wu3a^Hpr|r z>f$If72V)Y>rR{Ut)S132tXO(9^Hj*+})getz&fg#5a!U1-|XZ=FI|$XvwllC~5R8 zXbE;oMhJWZY>gEC96W!ng&*J8MNw^q_EBw}bAXJql{cxQl%O9224P=BwU&wU6I!7| z$#~u!&@B;wARJWRhv8qWQ?&K$p*;_Bvl+B|^`@cGdAm2a7!DpU>Vh9wg#UVt0$>>M z`w^FgcQ^SB$I-7p?4*(2=m$G2)OLRj)sYLctHCwHv1DL4_WUh%S14r6YnAX^Ql!6r~1Knp{Ou2*t=1EI(Jw5~)FY z5keIRB>|+@fIxsG&h~z5uIM*wX4aZD|K{YZopbg+Pu}v|&*@DE1o@#eGY)ITcO#6S zj;6m*Et@&M3g5`U^M@2V%kUEO59p1mD>zEVK!^_;w-2Zhu<* zu?vrmo%J}&$P;~8DlSHZj*yDK_&4~1Os>D+ZEoR@MDZxsxI9nZhnviN z0b2F^Qt0;Cv{8j*>n@)4GbJ`|%2TqzLT-G?&~#`6@{GE&IdK<{SZO0%Z7y_3Z+SlL zE>j_L>zZu&vwM;S8IrE?+Ts^)p5RH`m%xjJ)C-WzhomWjaJModyus|XK8PZQ1Jp70 zkK+gZjCVF+5$8Gt#JgB^*`ZwHPG+L=^BlDg2lhnA*6qefu@^z{Q{P5Q0SA>JcSryZ zOz$A#k$X~$nT%h#>2?&$(hm3OSO-)Oda%fF^29Dne${v&*LmvL4gupr4}e-D0_vHF z{_I5+(-zF8@ao7v?VR_maPJc1G3TV`PawG6PdC=mH zLj#{$Z@mc7wylMa3(cJxnA3Q^qUNOP6)mQvo5MyAI-}cpC9;ZBA$Y*tY+8fa0}LeK zYWDA_`7pzb{>UAC&7s}beciV|`SE^cW0gZ_ILB0QI6X_AyjP<>5A{LtI_!q`Dgn}( zDo+qbA@EUbLap|oEvN2z@86Sfa-NBN8P zk@z>nWt*e}El+%fJh{GN8}#QrsIuYgKId);Ljqr}y`O%L zd`pN_UM1aL%#HKUgZ^+l_57(*xD!-WW=2JZt!}mU+4zX9t2FD@je3pPvDlQ%7uVmH z91Yffq>4}Gc^s#tB23vO!i(EcJ#kplNsFI{wtnl=R!O{xHst6jcR)A#| z`$7q#d8dQX#nS_QpB&sRB~Lop^FQ3;+Z9A4^GjuUd{hMyUdQi zjtW#RA?J)u8(<~TC76I?fA09K?;y+Ro77ipTvxEqQ${gQodBpQx1&s8s><^=?5*W3 zvZ)q%Q7>SogYbDbzAp2ipiR78BuF;UOsw{=_qmTCirTJ$#RWSUP<;^ukJJ;At&nLw z5NPEmpus`C?ckYYo$wI&?2smgA17G8PE5FD{tgt{GZXNI3pl6Q9LjIr&V>0eb&ln4 zR4lc6_)LrA@RuIssHu=5OH};0M*HI@ar&wUZ$vc&BcL7~k~I5n4LE-+rU?>Ek?pO} zPvTnJj_@)blA7$-vHmQPYdXVZ$`(W&gr`1Is<52&1QcFCGKwps$lD<)o;qM!F~otG zJBDEgP&6hU=-+i=_2n>_ltUaywzA~Dn4ljzoEjz35_zZS54w*nA*gNV4L@tfLs8Wz znin4LsNsLW5d=Gb>l+i=`Y+-$mD_aRQEoNJ^^kMYCF z_g(g%H0gQ*FY!D%iY{47znwM7;rwvZ;_NLy?E##E!&8t9^+OJJ5Uu4-_J+tUMO+W= zOW{c1(e#Yb^Yu_*jIDonJM7Y+eAu!{XF+TOLCpTe`&V+S-3FD2y+wZV0c1vd6x44r z5xXnjs3NQ`=eD%0bX$qgib)5@Q@U{UE*5_1qpD)~6vyeFWuv~8PjcPwYHAsN@Bc6Y zGYV;9%2Vq0tTQr7T9w(#c9q%SH{Q*4{V|zvn~t8F4z7<*G(}IDy4PIPiCXc(;2~Qrob``TZmuLNM1TG${q;_MQEG;Ze97QVeZC(2Brr;_umB+P5?i z{b08%-WxO3hb#Q;I9gC$aDw$bB79hGRsD*PbA*BIj6a0n*YdO_iBk6*L&z+ z0#FaD4S1unODj@R`C;?oKv)u6{Qk7=Di15dG2*PraH^5Uoj+eayWnK9!IwO&JXdOE zcM)zDn4~Edxpp@aAJQpIM1_pLXkh4ir7~!oVR+qz8@gL)XXl=koiaRaq||b`b>D`l z2)I=b2K^jsblF<$OfgY!y9Ukr@sXW-nL$qHMo)8I2p{95c@SfnkbaEY>4|Pn4u6HP zE@UW@rO>|UonmF5O~0m{d}bzfnq|w~WNF=bUfnzd1BHH(TP8qNvbsJ&d@0eJrAz2_ zLVuSHdw({!Ce^Et=<_KbeF;*@TQK{Yjd=|MUm^LOkBwNJY?#e?R88Wz0gKY+tjWZ+ z+!$XdgOVVpaK(n~rtw)1>!;s#s%CQb9j>qWe}k`rgr+OCfeI_s-hT&wDr5rl`) zC%IN6UW!JkKCLm~=vjGXpoFP|am!`U=3rMR5O^{87IZJvDMC-HE@2P#u7h&?&El4= z#Tgeev#Ggae=U9HM8uVjwLdwO3p(lA1Pu0`oRBoJ-%FakOhs6ZYhOguJFA^7^zBfu z;x=2|OjAEQ$Sp)4kzdktx0uRKkaLE!sJb%a_)RH&*EJG|bPF8>l{)k$o`PJ^{xSQ% zSEH`fmF=jr@^VZIw;J(pyk@QidlEj$80`KRsUWSE)_CfRzm&Lb$gl0>Lb85!rtM>osc?ghxBq^4k15o&UTi>Xz)w!_$%K!gG_iJOe_17X&FSUi3@2HdQEpJVt zBi1M@YZbF|znoj8D;uSeYM&ItW%VjH`FZ4gs;qBe@f!#nvL}XM0!hKiLd3%A#X5#M z3-%ix$s;ME?Hhg)uEqZf{(A@FMslxsz)p=6F23lI%5Z>n3p zbtQo{Fb64D1;~;HFFo*}aYAeg@niWefx#{$8IrTdR{T!P zoTn&s5}6!WYL^R|_HUlXapDI`m#frd ze9GS&pdMcn?1_E<`74${Rwf06Js{RaxGR0y&>e% za7n*OL3-WjG%CgO4@E2k4V_TKwz*D&H>&D*t38}6lUB&C)_2Oy23&mW5x!$2l+J~p zFIAM%OGqG7Im?iEyMc*M+(4hXqy8RlilXh^g_@^4`Q_^p3%HSN{SI%bncYDFKkNoE zb1&Nb=72VuAX(DN8%;ZNd@Y<+=W+)jaR)sA>l6*-Nyv)LH>&abNDD|xOM$9`FnM;Y zY*S70$j$tLcYFLAz`>cIQ~Luuw3Ya=kMwVw8n&M;AM<|N+;xlkHVn~LfLr6AHT;M3 z|Ca;K-!?Xs5s(EVmmLmB=$YF^#a25B5IYojSHTu^sBZbnKY)q`3*xB>0<(=GQ!EG> z2bN6~L?L>86U3zV(qCM3ff;v#Dm?4vL`vE-_D6<#SodXPbfqL@et!($U^2wM!&ZP) zcMcD#v(pj`G{qc&n+3Z6-J|T6U$JDc8pgawsRZdq7ezrC^d|oGwyO9SV&ABK{3y_{ zWJ!_x8*}N(G1uYUF0cGVu}n$p&Ap6Kg9GSi1LVFI+}=V{5Ne`V<9%5IU+k2@X=_iH zdZfoYI7gqke72~c#|DSEaA4y)^!FJRb9jFGzHk`9siFRS{b@6u2vxPb`|OfZD-|Zf zul^idg^JKr{L#0f5W&LQdin6kEN_VXs!F3LU$mpNq`aMaC!QwV-0N;c zgPOL+u@{nue{*p>;wgk}q1ph{LfoL(y1+Qp&iyj}l;@7o*;~@dSH2`a477@}TC^~Q z29q9wKEM!O=73`xLl*1LSAH`e|8aWIqe`K2Senx%EatC?s`9GMHj)%G*k#%~2IRYw zpCTSbG~)xcpfuEXb$<;Z8TsgGc_0a_;)q^x0F$B*?(}}6x|;zks?eJjM3FAUM!D)4 zkRscEmvT`AF%L3j?S6?8Gy$ak*e$?M8Ep%(A)0kTRa66{(lpTIW>bEpM78h5O`cJpev>$Msdup@X;CiAaMo+_;)R!kkF$TdNqjhkKNh~(hLDT?F}`M4v*x}(Y!q>(z+O83w28TRtX{nb(D zRqWzivpzoOVlg6d#UVhE!VH@ArOrE*z0Ic=(@dmm)58Uym9Ag2Owwb}W$@jhDvnn; z(|sD(f{m}p^uE$dQc=&hW30@w^0th>YyFX~<85qboKa{>=ZwH3_m5<~ddqkyLuUSo z5kKWfiFx#~^p+_`#*Nq}Q13&e)$CO-rsK@eF@2IX0 z6O*=2ejjeyAYiW4I;pjq2g=Ad@ccItRle1kIMX*w9#m_5RmFJ=#Ys^iF@Cm}jf!`1 zaMzXSHFb8wGM7rqI;M)iRvrJc7+$Tg{?b>--Rf3(-x}j=VSK7(1+#j8&Su#?U32Tl z4KRNZ5dmr#=9d**pP8P=cdm!7udxWgqhP|dq}jqNT>kgVl^t#eL_wUnPD-HA zZsL9&Du^7es+DCye9|^-j7yNAJ$nCz)f=(M!0o)BIaxI{*|X zkgN|YAXKzyCLHr&XvOS8*qGB+?zGYO=ooTHJ_mOXHFSScy?>M9r`zU2AL^w%?2r6T z7c0K0TF%9@@4shMd`Ls(f9dZ6F!AtMZpw9G=GT8)tpab#xL%Jm`w4G6^u<1MDR?R& WpK-f;h}M7cjfal{zLELc=zjnQ6Cf-A literal 0 HcmV?d00001 diff --git a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/CMakeLists.txt b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/CMakeLists.txt index 0c48f81..e8a1d66 100644 --- a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/CMakeLists.txt +++ b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/CMakeLists.txt @@ -24,7 +24,9 @@ set(src_list ${CMAKE_CURRENT_SOURCE_DIR}/coll_all_reduce_order_preserved_executor.cc ${CMAKE_CURRENT_SOURCE_DIR}/coll_all_reduce_aiv_deter_executor.cc ${CMAKE_CURRENT_SOURCE_DIR}/coll_all_reduce_aiv_deter_small_executor.cc - + ${CMAKE_CURRENT_SOURCE_DIR}/coll_custom_small_all_reduce_mesh_executor.cc + ${CMAKE_CURRENT_SOURCE_DIR}/coll_custom_medium_all_reduce_mesh_executor.cc + ${CMAKE_CURRENT_SOURCE_DIR}/coll_custom_huge_all_reduce_mesh_executor.cc ) target_sources(hccl_alg PRIVATE diff --git a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.cc b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.cc new file mode 100644 index 0000000..a7f0de8 --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "coll_custom_huge_all_reduce_mesh_executor.h" + +namespace hccl { +CollCustomHugeAllReduceMeshExecutor::CollCustomHugeAllReduceMeshExecutor(const HcclDispatcher dispatcher, + std::unique_ptr &topoMatcher) + : CollCommExecutor(dispatcher, topoMatcher) +{ +} + +HcclResult CollCustomHugeAllReduceMeshExecutor::CalcScratchMemSize(u64 &scratchMemSize) +{ + // 计算所需要申请的 Scratch 内存大小 + // TODO: 选手可根据算法需要自行修改 + scratchMemSize = 0U; + HCCL_WARNING("[HCCLContest][CollCustomHugeAllReduceMeshExecutor][CalcScratchMemSize] scratchMemSize: %u", + scratchMemSize); + return HCCL_SUCCESS; +} + +HcclResult CollCustomHugeAllReduceMeshExecutor::CalcStreamNum(u32 &streamNum) +{ + // 计算所需要申请的 Stream 数量 + // TODO: 选手可根据算法需要自行修改 + u32 totalStreamNum = topoAttr_.deviceNumPerAggregation; + streamNum = totalStreamNum - 1U; + HCCL_WARNING("[HCCLContest][CollCustomHugeAllReduceMeshExecutor][CalcStreamNum] streamNum: %u", streamNum); + return HCCL_SUCCESS; +} + +HcclResult CollCustomHugeAllReduceMeshExecutor::CalcNotifyNum(u32 streamNum, u32 ¬ifyNum) +{ + // 计算所需要申请的 Notify 数量 + // TODO: 选手可根据算法需要自行修改 + notifyNum = 2U * streamNum; + HCCL_WARNING("[HCCLContest][CollCustomHugeAllReduceMeshExecutor][CalcNotifyNum] notifyNum: %u", notifyNum); + return HCCL_SUCCESS; +} + +HcclResult CollCustomHugeAllReduceMeshExecutor::CalcCommInfo(std::vector &opTransport) +{ + // 计算通信域信息 + // TODO: 选手可根据算法需要自行修改 + HCCL_WARNING("[HCCLContest][CollCustomHugeAllReduceMeshExecutor][CalcNotifyNum]"); + + // CCL_Input -> CCL_Output + TransportMemType inputType = TransportMemType::CCL_INPUT; + TransportMemType outputType = TransportMemType::CCL_OUTPUT; + // 建立 Mesh 链路 + CommParaInfo commParaLevel0(COMM_LEVEL0, CommType::COMM_TAG_MESH); + // 构造一级通信域资源请求 + // 最终将调用:CalcMeshTransportReq::CalcTransportRequest() + CHK_RET(CalcCommPlaneInfo(tag_, commParaLevel0, opTransport[COMM_LEVEL0], inputType, outputType)); + return HCCL_SUCCESS; +} + +u64 CollCustomHugeAllReduceMeshExecutor::CalcLoopMaxCount(const u64 cclBuffSize, const u32 unitSize) +{ + // 计算循环处理的迭代次数 + // TODO: 选手可根据算法需要自行修改 + + u64 maxCountPerLoop = cclBuffSize / unitSize; + HCCL_WARNING("[HCCLContest][CollCustomHugeAllReduceMeshExecutor][CalcLoopMaxCount] maxCountPerLoop: %u", + maxCountPerLoop); + return maxCountPerLoop; +} + +HcclResult CollCustomHugeAllReduceMeshExecutor::Orchestrate(OpParam ¶m, AlgResourceResponse &algRes) +{ + // 算法编排总入口 + // TODO: 选手可根据算法需要自行修改 + + HCCL_WARNING("[HCCLContest][CollCustomHugeAllReduceMeshExecutor][Orchestrate] count: %u", param.DataDes.count); + tag_ = param.tag; + algResResp_ = &algRes; + + // User_Input 和 User_Output 指针 + u8 *userInputPtr = static_cast(param.inputPtr); + u8 *userOutputPtr = static_cast(param.outputPtr); + CHK_PTR_NULL(userInputPtr); + CHK_PTR_NULL(userOutputPtr); + + u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; + u64 maxCountPerLoop = CalcLoopMaxCount(algRes.cclInputMem.size(), unitSize); + + // 循环处理数据 + for (u64 countLeft = param.DataDes.count, curCount = 0, inputOffset = 0, outputOffset = 0; countLeft > 0;) { + curCount = (countLeft > maxCountPerLoop) ? maxCountPerLoop : countLeft; + u64 curSize = curCount * unitSize; // curSize 为三种数据量:512K/2M/64M + + // 构造本次循环所使用的内存信息 + ExecMem execMem; + execMem.count = curCount; // 本次循环处理的数据量 + execMem.inputPtr = userInputPtr + inputOffset; // 本次循环使用的 User_Input 内存指针 + execMem.outputPtr = userOutputPtr + outputOffset; // 本次循环使用的 User_Output 内存指针 + execMem.inputMem = algRes.cclInputMem; // 本端的 CCL_Input 内存 + execMem.outputMem = algRes.cclOutputMem; // 本端的 CCL_Output 内存 + execMem.scratchMem = algRes.scratchMem; // 本端的 Scratch 内存 + + // 处理本次循环 + CHK_RET(KernelRun(param, execMem)); + + // 更新偏移量 + countLeft -= curCount; + inputOffset = curSize; + outputOffset = curSize; + } + return HCCL_SUCCESS; +} + +HcclResult CollCustomHugeAllReduceMeshExecutor::KernelRun(const OpParam ¶m, ExecMem &execMem) +{ + // 处理单次循环的数据 + // TODO: 选手可根据算法需要自行修改 + + u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; // 数据类型的字节数 + u64 curSize = execMem.count * unitSize; // 本次循环需要处理的数据大小,三种数据量:512K/2m/64m,单位:字节 + hccl::Stream &masterStream = const_cast(param.stream); // 主流 + + // TODO: 流同步 + + CHK_RET(CheckCommSize(COMM_LEVEL0, COMM_INDEX_0 + 1)); + SubCommInfo level0CommInfo = GetSubCommInfo(COMM_LEVEL0, COMM_INDEX_0); + HCCL_WARNING("[HCCLContest][CollCustomHugeAllReduceMeshExecutor][KernelRun] localRank: %u, localRankSize: %u", + level0CommInfo.localRank, level0CommInfo.localRankSize); + + // TODO: 搬运数据 + + return HCCL_SUCCESS; +} + +REGISTER_EXEC("CustomHugeAllReduceMeshExecutor", CustomHugeAllReduceMesh, CollCustomHugeAllReduceMeshExecutor); +} // namespace hccl diff --git a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.h b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.h new file mode 100644 index 0000000..707a5d6 --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef COLL_CUSTOM_HUGE_ALLREDUCE_MESH_EXECUTOR_H +#define COLL_CUSTOM_HUGE_ALLREDUCE_MESH_EXECUTOR_H + +#include "coll_comm_executor.h" + +namespace hccl { +class CollCustomHugeAllReduceMeshExecutor : public CollCommExecutor { +public: + CollCustomHugeAllReduceMeshExecutor(const HcclDispatcher dispatcher, std::unique_ptr &topoMatcher); + ~CollCustomHugeAllReduceMeshExecutor() = default; + +private: + /* *************** 资源计算 *************** */ + HcclResult CalcScratchMemSize(u64 &scratchMemSize) override; + HcclResult CalcStreamNum(u32 &streamNum) override; + HcclResult CalcNotifyNum(u32 streamNum, u32 ¬ifyNum) override; + HcclResult CalcCommInfo(std::vector &opTransport) override; + u64 CalcLoopMaxCount(const u64 cclBuffSize, const u32 unitSize); + + /* *************** 算法编排 *************** */ + HcclResult Orchestrate(OpParam ¶m, AlgResourceResponse &algRes); + HcclResult KernelRun(const OpParam ¶m, ExecMem &execMem) override; +}; +} // namespace hccl + +#endif diff --git a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.cc b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.cc new file mode 100644 index 0000000..6647cfc --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "coll_custom_medium_all_reduce_mesh_executor.h" + +namespace hccl { +CollCustomMediumAllReduceMeshExecutor::CollCustomMediumAllReduceMeshExecutor(const HcclDispatcher dispatcher, + std::unique_ptr &topoMatcher) + : CollCommExecutor(dispatcher, topoMatcher) +{ +} + +HcclResult CollCustomMediumAllReduceMeshExecutor::CalcScratchMemSize(u64 &scratchMemSize) +{ + // 计算所需要申请的 Scratch 内存大小 + // TODO: 选手可根据算法需要自行修改 + scratchMemSize = 0U; + HCCL_WARNING("[HCCLContest][CollCustomMediumAllReduceMeshExecutor][CalcScratchMemSize] scratchMemSize: %u", + scratchMemSize); + return HCCL_SUCCESS; +} + +HcclResult CollCustomMediumAllReduceMeshExecutor::CalcStreamNum(u32 &streamNum) +{ + // 计算所需要申请的 Stream 数量 + // TODO: 选手可根据算法需要自行修改 + u32 totalStreamNum = topoAttr_.deviceNumPerAggregation; + streamNum = totalStreamNum - 1U; + HCCL_WARNING("[HCCLContest][CollCustomMediumAllReduceMeshExecutor][CalcStreamNum] streamNum: %u", streamNum); + return HCCL_SUCCESS; +} + +HcclResult CollCustomMediumAllReduceMeshExecutor::CalcNotifyNum(u32 streamNum, u32 ¬ifyNum) +{ + // 计算所需要申请的 Notify 数量 + // TODO: 选手可根据算法需要自行修改 + notifyNum = 2U * streamNum; + HCCL_WARNING("[HCCLContest][CollCustomMediumAllReduceMeshExecutor][CalcNotifyNum] notifyNum: %u", notifyNum); + return HCCL_SUCCESS; +} + +HcclResult CollCustomMediumAllReduceMeshExecutor::CalcCommInfo(std::vector &opTransport) +{ + // 计算通信域信息 + // TODO: 选手可根据算法需要自行修改 + HCCL_WARNING("[HCCLContest][CollCustomMediumAllReduceMeshExecutor][CalcNotifyNum]"); + + // CCL_Input -> CCL_Output + TransportMemType inputType = TransportMemType::CCL_INPUT; + TransportMemType outputType = TransportMemType::CCL_OUTPUT; + // 建立 Mesh 链路 + CommParaInfo commParaLevel0(COMM_LEVEL0, CommType::COMM_TAG_MESH); + // 构造一级通信域资源请求 + // 最终将调用:CalcMeshTransportReq::CalcTransportRequest() + CHK_RET(CalcCommPlaneInfo(tag_, commParaLevel0, opTransport[COMM_LEVEL0], inputType, outputType)); + return HCCL_SUCCESS; +} + +u64 CollCustomMediumAllReduceMeshExecutor::CalcLoopMaxCount(const u64 cclBuffSize, const u32 unitSize) +{ + // 计算循环处理的迭代次数 + // TODO: 选手可根据算法需要自行修改 + + u64 maxCountPerLoop = cclBuffSize / unitSize; + HCCL_WARNING("[HCCLContest][CollCustomMediumAllReduceMeshExecutor][CalcLoopMaxCount] maxCountPerLoop: %u", + maxCountPerLoop); + return maxCountPerLoop; +} + +HcclResult CollCustomMediumAllReduceMeshExecutor::Orchestrate(OpParam ¶m, AlgResourceResponse &algRes) +{ + // 算法编排总入口 + // TODO: 选手可根据算法需要自行修改 + + HCCL_WARNING("[HCCLContest][CollCustomMediumAllReduceMeshExecutor][Orchestrate] count: %u", param.DataDes.count); + tag_ = param.tag; + algResResp_ = &algRes; + + // User_Input 和 User_Output 指针 + u8 *userInputPtr = static_cast(param.inputPtr); + u8 *userOutputPtr = static_cast(param.outputPtr); + CHK_PTR_NULL(userInputPtr); + CHK_PTR_NULL(userOutputPtr); + + u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; + u64 maxCountPerLoop = CalcLoopMaxCount(algRes.cclInputMem.size(), unitSize); + + // 循环处理数据 + for (u64 countLeft = param.DataDes.count, curCount = 0, inputOffset = 0, outputOffset = 0; countLeft > 0;) { + curCount = (countLeft > maxCountPerLoop) ? maxCountPerLoop : countLeft; + u64 curSize = curCount * unitSize; // curSize 为三种数据量:512K/2M/64M + + // 构造本次循环所使用的内存信息 + ExecMem execMem; + execMem.count = curCount; // 本次循环处理的数据量 + execMem.inputPtr = userInputPtr + inputOffset; // 本次循环使用的 User_Input 内存指针 + execMem.outputPtr = userOutputPtr + outputOffset; // 本次循环使用的 User_Output 内存指针 + execMem.inputMem = algRes.cclInputMem; // 本端的 CCL_Input 内存 + execMem.outputMem = algRes.cclOutputMem; // 本端的 CCL_Output 内存 + execMem.scratchMem = algRes.scratchMem; // 本端的 Scratch 内存 + + // 处理本次循环 + CHK_RET(KernelRun(param, execMem)); + + // 更新偏移量 + countLeft -= curCount; + inputOffset = curSize; + outputOffset = curSize; + } + return HCCL_SUCCESS; +} + +HcclResult CollCustomMediumAllReduceMeshExecutor::KernelRun(const OpParam ¶m, ExecMem &execMem) +{ + // 处理单次循环的数据 + // TODO: 选手可根据算法需要自行修改 + + u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; // 数据类型的字节数 + u64 curSize = execMem.count * unitSize; // 本次循环需要处理的数据大小,三种数据量:512K/2m/64m,单位:字节 + hccl::Stream &masterStream = const_cast(param.stream); // 主流 + + // TODO: 流同步 + + CHK_RET(CheckCommSize(COMM_LEVEL0, COMM_INDEX_0 + 1)); + SubCommInfo level0CommInfo = GetSubCommInfo(COMM_LEVEL0, COMM_INDEX_0); + HCCL_WARNING("[HCCLContest][CollCustomMediumAllReduceMeshExecutor][KernelRun] localRank: %u, localRankSize: %u", + level0CommInfo.localRank, level0CommInfo.localRankSize); + + // TODO: 搬运数据 + + return HCCL_SUCCESS; +} + +REGISTER_EXEC("CustomMediumAllReduceMeshExecutor", CustomMediumAllReduceMesh, CollCustomMediumAllReduceMeshExecutor); +} // namespace hccl diff --git a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.h b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.h new file mode 100644 index 0000000..cfa4d4b --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef COLL_CUSTOM_MEDIUM_ALLREDUCE_MESH_EXECUTOR_H +#define COLL_CUSTOM_MEDIUM_ALLREDUCE_MESH_EXECUTOR_H + +#include "coll_comm_executor.h" + +namespace hccl { +class CollCustomMediumAllReduceMeshExecutor : public CollCommExecutor { +public: + CollCustomMediumAllReduceMeshExecutor(const HcclDispatcher dispatcher, std::unique_ptr &topoMatcher); + ~CollCustomMediumAllReduceMeshExecutor() = default; + +private: + /* *************** 资源计算 *************** */ + HcclResult CalcScratchMemSize(u64 &scratchMemSize) override; + HcclResult CalcStreamNum(u32 &streamNum) override; + HcclResult CalcNotifyNum(u32 streamNum, u32 ¬ifyNum) override; + HcclResult CalcCommInfo(std::vector &opTransport) override; + u64 CalcLoopMaxCount(const u64 cclBuffSize, const u32 unitSize); + + /* *************** 算法编排 *************** */ + HcclResult Orchestrate(OpParam ¶m, AlgResourceResponse &algRes); + HcclResult KernelRun(const OpParam ¶m, ExecMem &execMem) override; +}; +} // namespace hccl + +#endif diff --git a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.cc b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.cc new file mode 100644 index 0000000..7aebc74 --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "coll_custom_small_all_reduce_mesh_executor.h" + +namespace hccl { +CollCustomSmallAllReduceMeshExecutor::CollCustomSmallAllReduceMeshExecutor(const HcclDispatcher dispatcher, + std::unique_ptr &topoMatcher) + : CollCommExecutor(dispatcher, topoMatcher) +{ +} + +HcclResult CollCustomSmallAllReduceMeshExecutor::CalcScratchMemSize(u64 &scratchMemSize) +{ + // 计算所需要申请的 Scratch 内存大小 + // TODO: 选手可根据算法需要自行修改 + scratchMemSize = 0U; + HCCL_WARNING("[HCCLContest][CollCustomSmallAllReduceMeshExecutor][CalcScratchMemSize] scratchMemSize: %u", + scratchMemSize); + return HCCL_SUCCESS; +} + +HcclResult CollCustomSmallAllReduceMeshExecutor::CalcStreamNum(u32 &streamNum) +{ + // 计算所需要申请的 Stream 数量 + // TODO: 选手可根据算法需要自行修改 + u32 totalStreamNum = topoAttr_.deviceNumPerAggregation; + streamNum = totalStreamNum - 1U; + HCCL_WARNING("[HCCLContest][CollCustomSmallAllReduceMeshExecutor][CalcStreamNum] streamNum: %u", streamNum); + return HCCL_SUCCESS; +} + +HcclResult CollCustomSmallAllReduceMeshExecutor::CalcNotifyNum(u32 streamNum, u32 ¬ifyNum) +{ + // 计算所需要申请的 Notify 数量 + // TODO: 选手可根据算法需要自行修改 + notifyNum = 2U * streamNum; + HCCL_WARNING("[HCCLContest][CollCustomSmallAllReduceMeshExecutor][CalcNotifyNum] notifyNum: %u", notifyNum); + return HCCL_SUCCESS; +} + +HcclResult CollCustomSmallAllReduceMeshExecutor::CalcCommInfo(std::vector &opTransport) +{ + // 计算通信域信息 + // TODO: 选手可根据算法需要自行修改 + HCCL_WARNING("[HCCLContest][CollCustomSmallAllReduceMeshExecutor][CalcNotifyNum]"); + + // CCL_Input -> CCL_Output + TransportMemType inputType = TransportMemType::CCL_INPUT; + TransportMemType outputType = TransportMemType::CCL_OUTPUT; + // 建立 Mesh 链路 + CommParaInfo commParaLevel0(COMM_LEVEL0, CommType::COMM_TAG_MESH); + // 构造一级通信域资源请求 + // 最终将调用:CalcMeshTransportReq::CalcTransportRequest() + CHK_RET(CalcCommPlaneInfo(tag_, commParaLevel0, opTransport[COMM_LEVEL0], inputType, outputType)); + return HCCL_SUCCESS; +} + +u64 CollCustomSmallAllReduceMeshExecutor::CalcLoopMaxCount(const u64 cclBuffSize, const u32 unitSize) +{ + // 计算循环处理的迭代次数 + // TODO: 选手可根据算法需要自行修改 + + u64 maxCountPerLoop = cclBuffSize / unitSize; + HCCL_WARNING("[HCCLContest][CollCustomSmallAllReduceMeshExecutor][CalcLoopMaxCount] maxCountPerLoop: %u", + maxCountPerLoop); + return maxCountPerLoop; +} + +HcclResult CollCustomSmallAllReduceMeshExecutor::Orchestrate(OpParam ¶m, AlgResourceResponse &algRes) +{ + // 算法编排总入口 + // TODO: 选手可根据算法需要自行修改 + + HCCL_WARNING("[HCCLContest][CollCustomSmallAllReduceMeshExecutor][Orchestrate] count: %u", param.DataDes.count); + tag_ = param.tag; + algResResp_ = &algRes; + + // User_Input 和 User_Output 指针 + u8 *userInputPtr = static_cast(param.inputPtr); + u8 *userOutputPtr = static_cast(param.outputPtr); + CHK_PTR_NULL(userInputPtr); + CHK_PTR_NULL(userOutputPtr); + + u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; + u64 maxCountPerLoop = CalcLoopMaxCount(algRes.cclInputMem.size(), unitSize); + + // 循环处理数据 + for (u64 countLeft = param.DataDes.count, curCount = 0, inputOffset = 0, outputOffset = 0; countLeft > 0;) { + curCount = (countLeft > maxCountPerLoop) ? maxCountPerLoop : countLeft; + u64 curSize = curCount * unitSize; // curSize 为三种数据量:512K/2M/64M + + // 构造本次循环所使用的内存信息 + ExecMem execMem; + execMem.count = curCount; // 本次循环处理的数据量 + execMem.inputPtr = userInputPtr + inputOffset; // 本次循环使用的 User_Input 内存指针 + execMem.outputPtr = userOutputPtr + outputOffset; // 本次循环使用的 User_Output 内存指针 + execMem.inputMem = algRes.cclInputMem; // 本端的 CCL_Input 内存 + execMem.outputMem = algRes.cclOutputMem; // 本端的 CCL_Output 内存 + execMem.scratchMem = algRes.scratchMem; // 本端的 Scratch 内存 + + // 处理本次循环 + CHK_RET(KernelRun(param, execMem)); + + // 更新偏移量 + countLeft -= curCount; + inputOffset = curSize; + outputOffset = curSize; + } + return HCCL_SUCCESS; +} + +HcclResult CollCustomSmallAllReduceMeshExecutor::KernelRun(const OpParam ¶m, ExecMem &execMem) +{ + // 处理单次循环的数据 + // TODO: 选手可根据算法需要自行修改 + + u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; // 数据类型的字节数 + u64 curSize = execMem.count * unitSize; // 本次循环需要处理的数据大小,三种数据量:512K/2m/64m,单位:字节 + hccl::Stream &masterStream = const_cast(param.stream); // 主流 + + // TODO: 流同步 + + CHK_RET(CheckCommSize(COMM_LEVEL0, COMM_INDEX_0 + 1)); + SubCommInfo level0CommInfo = GetSubCommInfo(COMM_LEVEL0, COMM_INDEX_0); + HCCL_WARNING("[HCCLContest][CollCustomSmallAllReduceMeshExecutor][KernelRun] localRank: %u, localRankSize: %u", + level0CommInfo.localRank, level0CommInfo.localRankSize); + + // TODO: 搬运数据 + + return HCCL_SUCCESS; +} + +REGISTER_EXEC("CustomSmallAllReduceMeshExecutor", CustomSmallAllReduceMesh, CollCustomSmallAllReduceMeshExecutor); +} // namespace hccl diff --git a/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.h b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.h new file mode 100644 index 0000000..f373a0e --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef COLL_CUSTOM_SMALL_ALLREDUCE_MESH_EXECUTOR_H +#define COLL_CUSTOM_SMALL_ALLREDUCE_MESH_EXECUTOR_H + +#include "coll_comm_executor.h" + +namespace hccl { +class CollCustomSmallAllReduceMeshExecutor : public CollCommExecutor { +public: + CollCustomSmallAllReduceMeshExecutor(const HcclDispatcher dispatcher, std::unique_ptr &topoMatcher); + ~CollCustomSmallAllReduceMeshExecutor() = default; + +private: + /* *************** 资源计算 *************** */ + HcclResult CalcScratchMemSize(u64 &scratchMemSize) override; + HcclResult CalcStreamNum(u32 &streamNum) override; + HcclResult CalcNotifyNum(u32 streamNum, u32 ¬ifyNum) override; + HcclResult CalcCommInfo(std::vector &opTransport) override; + u64 CalcLoopMaxCount(const u64 cclBuffSize, const u32 unitSize); + + /* *************** 算法编排 *************** */ + HcclResult Orchestrate(OpParam ¶m, AlgResourceResponse &algRes); + HcclResult KernelRun(const OpParam ¶m, ExecMem &execMem) override; +}; +} // namespace hccl + +#endif diff --git a/src/domain/collective_communication/algorithm/impl/operator/CMakeLists.txt b/src/domain/collective_communication/algorithm/impl/operator/CMakeLists.txt index ccf812f..b1726cd 100644 --- a/src/domain/collective_communication/algorithm/impl/operator/CMakeLists.txt +++ b/src/domain/collective_communication/algorithm/impl/operator/CMakeLists.txt @@ -13,6 +13,7 @@ set(src_list ${CMAKE_CURRENT_SOURCE_DIR}/send_operator.cc ${CMAKE_CURRENT_SOURCE_DIR}/receive_operator.cc ${CMAKE_CURRENT_SOURCE_DIR}/batch_write_operator.cc + ${CMAKE_CURRENT_SOURCE_DIR}/custom_all_reduce_operator.cc ) target_sources(hccl_alg PRIVATE diff --git a/src/domain/collective_communication/algorithm/impl/operator/all_reduce_operator.cc b/src/domain/collective_communication/algorithm/impl/operator/all_reduce_operator.cc index 626018d..76bfd01 100644 --- a/src/domain/collective_communication/algorithm/impl/operator/all_reduce_operator.cc +++ b/src/domain/collective_communication/algorithm/impl/operator/all_reduce_operator.cc @@ -599,6 +599,6 @@ HcclResult AllReduceOperator::SelectAlgfor91093(const OpParam& param, std::strin return HCCL_SUCCESS; } -REGISTER_OP(HcclCMDType::HCCL_CMD_ALLREDUCE, AllReduce, AllReduceOperator); +// REGISTER_OP(HcclCMDType::HCCL_CMD_ALLREDUCE, AllReduce, AllReduceOperator); } diff --git a/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc b/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc new file mode 100644 index 0000000..cd73e75 --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "custom_all_reduce_operator.h" +#include "coll_alg_op_registry.h" + +namespace hccl { + +CustomAllReduceOperator::CustomAllReduceOperator(AlgConfigurator *algConfigurator, CCLBufferManager &cclBufferManager, + HcclDispatcher dispatcher, std::unique_ptr &topoMatcher) + : CollAlgOperator(algConfigurator, cclBufferManager, dispatcher, topoMatcher, HcclCMDType::HCCL_CMD_ALLREDUCE) +{ +} + +CustomAllReduceOperator::~CustomAllReduceOperator() {} + +HcclResult CustomAllReduceOperator::SelectAlg(const std::string &tag, const OpParam ¶m, std::string &algName, + std::string &newTag) +{ + constexpr u64 HCCL_CONTEST_SMALL_COUNT_KB = 512 * 1024; // 512KB + constexpr u64 HCCL_CONTEST_MEDIUM_COUNT_KB = 2 * 1024 * 1024; // 2MB + constexpr u64 HCCL_CONTEST_HUGE_COUNT_KB = 64 * 1024 * 1024; // 64MB + + // 算法选择逻辑 + // TODO: 选手可根据数据量大小选择合适的 Executor + // 注意: + // 1. 相同算法在不同数据量下的性能不同 + // 2. 选手可以先只实现一个 Executor,算法选择时直接设置 algName 为该 Executor 的名字 + + u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; + u64 dataSize = param.DataDes.count * unitSize; // 单位:字节,三种数据量:512K/2M/64M + if (dataSize <= HCCL_CONTEST_SMALL_COUNT_KB) { + algName = "CustomSmallAllReduceMeshExecutor"; + } else if (dataSize <= HCCL_CONTEST_MEDIUM_COUNT_KB) { + algName = "CustomMediumAllReduceMeshExecutor"; + } else { + algName = "CustomHugeAllReduceMeshExecutor"; + } + return HCCL_SUCCESS; +} + +// 注册算子 +REGISTER_OP(HcclCMDType::HCCL_CMD_ALLREDUCE, AllReduce, CustomAllReduceOperator); +} // namespace hccl diff --git a/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.h b/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.h new file mode 100644 index 0000000..41ae73d --- /dev/null +++ b/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef CUSTOM_ALL_REDUCE_OPERATOR_H +#define CUSTOM_ALL_REDUCE_OPERATOR_H + +#include "coll_alg_operator.h" + +namespace hccl { +// 数据规模分类 +enum class HcclDataCountType { HCCL_COUNT_SMALL = 0, HCCL_COUNT_MEDIUM, HCCL_COUNT_HUGE, HCCL_COUNT_RESERVED }; + +class CustomAllReduceOperator : public CollAlgOperator { +public: + CustomAllReduceOperator(AlgConfigurator *algConfigurator, CCLBufferManager &cclBufferManager, + HcclDispatcher dispatcher, std::unique_ptr &topoMatcher); + + ~CustomAllReduceOperator(); + + HcclResult SelectAlg(const std::string &tag, const OpParam ¶m, std::string &algName, + std::string &newTag) override; +}; +} // namespace hccl +#endif diff --git a/submit.sh b/submit.sh new file mode 100755 index 0000000..655e94b --- /dev/null +++ b/submit.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +set -e + +# bash fonts colors +red='\e[31m' +yellow='\e[33m' +green='\e[92m' +none='\e[0m' + +error() { echo -e "${red}$*${none}" && exit 1; } +warning() { echo -e "${yellow}$*${none}"; } +info() { echo -e "${green}$*${none}"; } + +src_dir="/workspace/cann-hccl" +dst_dir="/result" + +operator_dir="src/domain/collective_communication/algorithm/impl/operator" +executor_dir="src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce" + +files=( + "${operator_dir}/custom_all_reduce_operator.h" + "${operator_dir}/custom_all_reduce_operator.cc" + "${executor_dir}/coll_custom_small_all_reduce_mesh_executor.h" + "${executor_dir}/coll_custom_small_all_reduce_mesh_executor.cc" + "${executor_dir}/coll_custom_medium_all_reduce_mesh_executor.h" + "${executor_dir}/coll_custom_medium_all_reduce_mesh_executor.cc" + "${executor_dir}/coll_custom_huge_all_reduce_mesh_executor.h" + "${executor_dir}/coll_custom_huge_all_reduce_mesh_executor.cc" +) + +for file in "${files[@]}"; do + file_path="${src_dir}/${file}" + if [ -f "${file_path}" ]; then + cp -i "${file_path}" "${dst_dir}" + info "Copied: ${file_path} to ${dst_dir}" + else + error "No such file: ${file_path}" + fi +done + +info "All files copied successfully to ${dst_dir}" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bbd859a..8475b05 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -418,6 +418,7 @@ set(src_list_alg ${HCCL_OPEN_CODE_ALGORITHM}/impl/operator/registry/coll_alg_op_registry.cc ${HCCL_OPEN_CODE_ALGORITHM}/impl/operator/coll_alg_operator.cc ${HCCL_OPEN_CODE_ALGORITHM}/impl/operator/all_reduce_operator.cc + ${HCCL_OPEN_CODE_ALGORITHM}/impl/operator/custom_all_reduce_operator.cc ${HCCL_OPEN_CODE_ALGORITHM}/impl/operator/all_gather_operator.cc ${HCCL_OPEN_CODE_ALGORITHM}/impl/operator/all_gather_v_operator.cc ${HCCL_OPEN_CODE_ALGORITHM}/impl/operator/reduce_scatter_operator.cc @@ -477,8 +478,7 @@ target_compile_options(hccl_alg_test PRIVATE -fno-strict-aliasing -pipe -std=c++14 - -Os - -O2 + -O0 -g -fstack-protector-all $<$:-fsanitize=address -fsanitize-recover=address,all -fno-omit-frame-pointer -g> ) @@ -521,9 +521,10 @@ add_custom_target(hccl_alg_test_lib COMMAND cd ${CMAKE_INSTALL_PREFIX}/hccl_lib ) -add_custom_command(TARGET hccl_alg_test POST_BUILD - COMMAND ${CMAKE_STRIP} $ -) +# 禁用 strip +# add_custom_command(TARGET hccl_alg_test POST_BUILD +# COMMAND ${CMAKE_STRIP} $ +# ) install(TARGETS hccl_alg_test LIBRARY DESTINATION lib OPTIONAL diff --git a/test/algorithm/testcase/main.cc b/test/algorithm/testcase/main.cc index db03489..74d7e92 100644 --- a/test/algorithm/testcase/main.cc +++ b/test/algorithm/testcase/main.cc @@ -2,7 +2,7 @@ GTEST_API_ int main(int argc, char **argv) { // testcase调试代码,只跑特定的用例 - //testing::GTEST_FLAG(filter) = "AllReduceTest.allreduce_cyw_test"; + testing::GTEST_FLAG(filter) = "AllReduceTest.allreduce_contest_test*"; testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/test/algorithm/testcase/testcase_all_reduce.cc b/test/algorithm/testcase/testcase_all_reduce.cc index 7dc31ad..9f87d77 100644 --- a/test/algorithm/testcase/testcase_all_reduce.cc +++ b/test/algorithm/testcase/testcase_all_reduce.cc @@ -1751,4 +1751,394 @@ TEST_F(AllReduceTest, allreduce_aiv_determinstic_test) ret = checker.Check(checkerOpParam, topoMeta); // EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); -} \ No newline at end of file +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_512k_int8) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 512k + int8 + u64 size = 512 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_INT8; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_2m_int8) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 2m + int8 + u64 size = 2 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_INT8; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_64m_int8) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 64m + int8 + u64 size = 64 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_INT8; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_1g_int8) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 1g + int8 + u64 size = 1 * 1024 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_INT8; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_4g_int8) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 4g + int8 + u64 size = 4 * 1024 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_INT8; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_512k_fp16) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 512k + fp16 + u64 size = 512 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP16; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_2m_fp16) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 2m + fp16 + u64 size = 2 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP16; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_64m_fp16) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 64m + fp16 + u64 size = 64 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP16; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_1g_fp16) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 1g + fp16 + u64 size = 1 * 1024 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP16; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_4g_fp16) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 4g + fp16 + u64 size = 4 * 1024 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP16; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_512k_fp32) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 512k + fp32 + u64 size = 512 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP32; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_2m_fp32) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 2m + fp32 + u64 size = 2 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP32; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_64m_fp32) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 64m + fp32 + u64 size = 64 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP32; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_1g_fp32) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 1g + fp32 + u64 size = 1 * 1024 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP32; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} + +TEST_F(AllReduceTest, allreduce_contest_test_910b_4g_fp32) +{ + // 拓扑:单机 8 卡 + RankTable_For_LLT gen; + TopoMeta topoMeta; + gen.GenTopoMeta(topoMeta, 1, 1, 8); + + // 4g + fp32 + u64 size = 4 * 1024 * 1024 * 1024; + auto dataType = CheckerDataType::DATA_TYPE_FP32; + CheckerOpParam checkerOpParam; + checkerOpParam.opType = CheckerOpType::ALLREDUCE; + checkerOpParam.tag = "AllReduce"; + checkerOpParam.opMode = CheckerOpMode::OPBASE; + checkerOpParam.DataDes.count = size / SIZE_TABLE[dataType]; + checkerOpParam.DataDes.dataType = dataType; + checkerOpParam.devtype = CheckerDevType::DEV_TYPE_910B; + + Checker checker; + HcclResult ret; + checker.CloseRankMemCheck(); + checker.EnableTaskPrint(); + ret = checker.Check(checkerOpParam, topoMeta); + EXPECT_EQ(ret, HcclResult::HCCL_SUCCESS); +} -- Gitee From 0de11bc67d74e56eca10eb633ad2906ac3290c83 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Fri, 18 Jul 2025 14:20:48 +0000 Subject: [PATCH 03/11] =?UTF-8?q?!71=20[HCCL=E7=AB=9E=E8=B5=9B]=20?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=8F=82=E8=B5=9B=E6=8C=87=E5=AF=BC=E6=96=87?= =?UTF-8?q?=E6=A1=A3=20*=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contest.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contest.md b/contest.md index 26b6e9f..ed14660 100644 --- a/contest.md +++ b/contest.md @@ -13,7 +13,8 @@ HCCL 资料: -- [昇腾社区官网][1腾社区][2] +- [昇腾社区官网][1] +- [HCCL主页——昇腾社区][2] - [HCCL概述——昇腾社区][3] - [集合通信原语——昇腾社区][4] - [HCCL代码仓][5] -- Gitee From 66d90510a04e37122036bac7fac5e4f43d23ea70 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Sun, 20 Jul 2025 10:58:27 +0000 Subject: [PATCH 04/11] =?UTF-8?q?!72=20[HCCL=E7=AB=9E=E8=B5=9B]=20?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=B5=9B=E9=A2=98=E8=B5=84=E6=96=99=20Merge?= =?UTF-8?q?=20pull=20request=20!72=20from=20Yuanhao=20Ji/contest-4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 142 ++++++------------ contest.md | 48 +++--- .../operator/custom_all_reduce_operator.cc | 10 +- submit.sh | 2 +- 4 files changed, 76 insertions(+), 126 deletions(-) diff --git a/Dockerfile b/Dockerfile index ae431ab..f853e97 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,20 @@ # NOTE: Building this image requires docker version >= 18.0 -ARG TARGETPLATFORM=linux/arm64 ARG BASE_IMAGE=ubuntu:22.04 -ARG PYTHON_VERSION=3.10 -# 阶段 1:安装依赖 -FROM ${BASE_IMAGE} AS base +FROM ${BASE_IMAGE} AS official + +ARG TARGETPLATFORM=linux/arm64 + +ENV USER_PASSWD=change_me + +SHELL [ "/bin/bash", "-c" ] + +RUN cp /etc/apt/sources.list /etc/apt/sources.list.backup && \ + case ${TARGETPLATFORM} in \ + "linux/arm64") sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list ;; \ + *) sed -i 's|archive.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list ;; \ + esac RUN apt-get update \ && apt-get install --no-install-recommends -y \ @@ -20,6 +29,13 @@ RUN apt-get update \ g++ \ make \ cmake \ + python3 \ + python3-pip \ + gdb \ + vim \ + file \ + man \ + sudo \ zlib1g \ openssl \ unzip \ @@ -40,39 +56,25 @@ RUN apt-get update \ libgdbm-dev \ liblzma-dev \ libev-dev \ + openssh-server \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /var/tmp/* \ && rm -rf /tmp/* -# 阶段 2:安装 Conda -FROM base AS conda-installer +# 创建 hccl 用户 +RUN groupadd -g 1000 hcclgroup && \ + useradd -u 1000 -g hcclgroup -ms /bin/bash hccluser && \ + usermod -aG sudo hccluser && \ + echo "hccluser ALL=(ALL) NOPASSWD:/usr/bin/apt-get,/usr/bin/apt" >> /etc/sudoers -ARG TARGETPLATFORM -ARG PYTHON_VERSION - -RUN case ${TARGETPLATFORM} in \ - "linux/arm64") ARCH=aarch64 ;; \ - *) ARCH=x86_64 ;; \ - esac && \ - curl -fsSL -o /tmp/miniconda.sh -O "https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-${ARCH}.sh" - -RUN chmod +x /tmp/miniconda.sh && \ - bash /tmp/miniconda.sh -b -p /opt/conda && \ - rm /tmp/miniconda.sh && \ - /opt/conda/bin/conda install -y python=${PYTHON_VERSION} && \ - /opt/conda/bin/conda clean -ya - -# 阶段 3:安装 CANN 8.2.RC1.alpha003 -FROM conda-installer AS cann-installer - -ARG TARGETPLATFORM -ENV PATH=/opt/conda/bin:${PATH} +USER hccluser RUN pip install --no-cache-dir -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \ attrs cython numpy==1.24.0 decorator sympy cffi pyyaml pathlib2 \ psutil protobuf==3.20 scipy requests absl-py +# 安装 CANN 8.2.RC1.alpha003 RUN case ${TARGETPLATFORM} in \ "linux/arm64") ARCH=aarch64 ;; \ *) ARCH=x86_64 ;; \ @@ -82,112 +84,58 @@ RUN case ${TARGETPLATFORM} in \ CANN_COMMUNITY_SDK_URL="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-communitysdk_8.2.RC1.alpha003_linux-${ARCH}.run" && \ curl -fsSL -o /tmp/Ascend-cann-communitysdk.run -O "${CANN_COMMUNITY_SDK_URL}" -# 安装 CANN Toolkit RUN chmod +x /tmp/Ascend-cann-toolkit.run && \ /tmp/Ascend-cann-toolkit.run --quiet --install && \ rm /tmp/Ascend-cann-toolkit.run -# 安装 Community SDK RUN chmod +x /tmp/Ascend-cann-communitysdk.run && \ /tmp/Ascend-cann-communitysdk.run --quiet --full && \ rm /tmp/Ascend-cann-communitysdk.run -# 阶段 4:下载 HCCL 仓库及其依赖 -FROM cann-installer AS hccl-installer - -WORKDIR /workspace - +# 安装 HCCL 依赖 RUN curl -fsSL -o /tmp/include.zip -O https://github.com/nlohmann/json/releases/download/v3.11.2/include.zip && \ - unzip -d /workspace/nlohmann_json /tmp/include.zip && \ + unzip -d ${HOME}/nlohmann_json /tmp/include.zip && \ rm /tmp/include.zip -# 安装 MPI RUN curl -fsSL -o /tmp/mpich.tar.gz -O https://www.mpich.org/static/downloads/3.2.1/mpich-3.2.1.tar.gz && \ - tar -zxf /tmp/mpich.tar.gz -C /workspace && \ - cd /workspace/mpich-3.2.1 && \ - ./configure --disable-fortran --prefix=/workspace/mpich --with-device=ch3:nemesis && \ + tar -zxf /tmp/mpich.tar.gz -C /tmp && \ + cd /tmp/mpich-3.2.1 && \ + ./configure --disable-fortran --prefix=${HOME}/mpich --with-device=ch3:nemesis && \ make && make install && \ - rm -r /workspace/mpich-3.2.1 && \ + rm -r /tmp/mpich-3.2.1 && \ rm /tmp/mpich.tar.gz # 设置环境变量 RUN \ - # Conda 环境变量 - echo 'export PATH=/opt/conda/bin:${PATH}' >> /root/.bashrc && \ # NPU 驱动环境变量 - echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:${LD_LIBRARY_PATH}' >> /root/.bashrc && \ - echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common:${LD_LIBRARY_PATH}' >> /root/.bashrc && \ + echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc && \ + echo 'export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc && \ # CANN Toolkit 环境变量 - echo 'source /usr/local/Ascend/ascend-toolkit/set_env.sh' >> /root/.bashrc && \ + echo 'source ${HOME}/Ascend/ascend-toolkit/set_env.sh' >> ${HOME}/.bashrc && \ # MPICH 环境变量 - echo 'export PATH=/workspace/mpich/bin:${PATH}' >> /root/.bashrc && \ - echo 'export LD_LIBRARY_PATH=/workspace/mpich/lib:${LD_LIBRARY_PATH}' >> /root/.bashrc - -# 阶段 5:安装 SSH -FROM base AS ssh-installer + echo 'export PATH=${HOME}/mpich/bin:${PATH}' >> ${HOME}/.bashrc && \ + echo 'export LD_LIBRARY_PATH=${HOME}/mpich/lib:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - openssh-server +USER root # SSH 配置 RUN echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ - echo "PermitRootLogin yes" >> /etc/ssh/sshd_config && \ + echo "PermitRootLogin no" >> /etc/ssh/sshd_config && \ echo "PermitUserEnvironment yes" >> /etc/ssh/sshd_config && \ echo "ClientAliveInterval 60" >> /etc/ssh/sshd_config && \ - echo "ClientAliveCountMax 3" >> /etc/ssh/sshd_config + echo "ClientAliveCountMax 3" >> /etc/ssh/sshd_config && \ + echo "AllowUsers hccluser" >> /etc/ssh/sshd_config # SSH 启动脚本 RUN echo '#!/bin/bash' > /start.sh && \ - echo 'if [ -n "${ROOT_PASSWD}" ]; then' >> /start.sh && \ - echo ' echo "root:${ROOT_PASSWD}" | chpasswd' >> /start.sh && \ + echo 'if [ -n "${USER_PASSWD}" ]; then' >> /start.sh && \ + echo ' echo "hccluser:${USER_PASSWD}" | chpasswd' >> /start.sh && \ echo 'fi' >> /start.sh && \ echo 'mkdir -p /var/run/sshd' >> /start.sh && \ echo 'ssh-keygen -A' >> /start.sh && \ echo '/usr/sbin/sshd -D -e' >> /start.sh && \ chmod +x /start.sh -# 最终阶段:安装运行所需依赖,复制前面阶段结果 -FROM ${BASE_IMAGE} AS official - -ENV ROOT_PASSWD=change_me - -SHELL [ "/bin/bash", "-c" ] - -RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ - apt-transport-https \ - ca-certificates \ - bash \ - libc6 \ - libsqlite3-dev \ - git \ - gcc \ - g++ \ - gdb \ - make \ - cmake \ - file \ - vim \ - netcat \ - curl \ - wget \ - openssh-server \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /var/tmp/* \ - && rm -rf /tmp/* - -COPY --from=cann-installer /opt/conda /opt/conda -COPY --from=cann-installer /etc/Ascend /etc/Ascend -COPY --from=cann-installer /usr/local/Ascend /usr/local/Ascend -COPY --from=hccl-installer /root/.bashrc /root/.bashrc -COPY --from=hccl-installer /workspace /workspace -COPY --from=ssh-installer /etc/ssh/sshd_config /etc/ssh/sshd_config -COPY --from=ssh-installer /start.sh /start.sh - EXPOSE 22 -WORKDIR /workspace - CMD [ "/start.sh" ] diff --git a/contest.md b/contest.md index ed14660..d4a35e8 100644 --- a/contest.md +++ b/contest.md @@ -69,13 +69,13 @@ ssh root@ip -p port |-- /dev | |-- davinci1 # NPU1 | `-- davinci2 # NPU2 -|-- /etc/Ascend -| `-- ascend_cann_install.info # CANN 安装信息 |-- /usr/local/Ascend -| |-- ascend-toolkit # CANN Toolkit 安装目录 | `-- driver # NPU 驱动安装目录 -`-- /workspace - |-- cann-hccl # HCCL 代码仓,选手需自行下载 +|-- /home/hccluser/Ascend +| |-- ascend-toolkit # CANN Toolkit 安装目录 +| `-- ascend_cann_install.info # CANN 安装信息 +`-- /home/hccluser + |-- cann-hccl # HCCL 代码仓(选手需自行下载) |-- mpich # MPICH 安装目录 `-- nlohmann_json # nlohmann json inclue 目录 ``` @@ -102,7 +102,7 @@ ssh root@ip -p port > 【注意】选手只需下载 [ascend/cann-hccl](https://gitee.com/ascend/cann-hccl.git) 代码仓即可,编译运行所需全部依赖已提前安装 ```bash -cd /workspace +cd /home/hccluser git clone https://gitee.com/ascend/cann-hccl.git -b r1.5.1 ``` @@ -130,17 +130,17 @@ git clone https://gitee.com/ascend/cann-hccl.git -b r1.5.1 编译所需的依赖项均已安装,在 HCCL 代码仓执行编译即可: ```bash -cd /workspace/cann-hccl +cd /home/hccluser/cann-hccl -bash build.sh --nlohmann_path /workspace/nlohmann_json/include +bash build.sh --nlohmann_path /home/hccluser/nlohmann_json/include ``` ## 6. 安装编译结果 -编译生成的 HCCL 软件包在 `/workspace/cann-hccl/output` 目录下: +编译生成的 HCCL 软件包在 `/home/hccluser/cann-hccl/output` 目录下: ```bash -cd /workspace/cann-hccl/output +cd /home/hccluser/cann-hccl/output ./CANN-hccl_alg-8.2.t12.0.b077-linux.aarch64.run ``` @@ -156,10 +156,10 @@ cd /workspace/cann-hccl/output 编译并执行算法分析器用例: ```bash -cd /workspace/cann-hccl +cd /home/hccluser/cann-hccl # 编译测试用例 -bash build.sh --nlohmann_path /workspace/nlohmann_json/include --test --open_hccl_test +bash build.sh --nlohmann_path /home/hccluser/nlohmann_json/include --test --open_hccl_test # 执行测试用例 ./build/test/open_hccl_test @@ -172,18 +172,18 @@ bash build.sh --nlohmann_path /workspace/nlohmann_json/include --test --open_hcc 基于 HCCL Test 工具在 NPU 设备上执行验证: ```bash -cd /usr/local/Ascend/ascend-toolkit/latest/tools/hccl_test +cd /home/hccluser/Ascend/ascend-toolkit/latest/tools/hccl_test # 编译 HCCL 性能测试工具 -make MPI_HOME=/workspace/mpich ASCEND_DIR=/usr/local/Ascend/ascend-toolkit/latest +make MPI_HOME=/home/hccluser/mpich ASCEND_DIR=/home/hccluser/Ascend/ascend-toolkit/latest # 执行 HCCL Test # 512K -mpirun -n 2 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 2 +mpirun -n 4 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 # 2M -mpirun -n 2 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 2 +mpirun -n 4 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 4 # 64M -mpirun -n 2 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 2 +mpirun -n 4 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 ``` > 工具详细说明可参考:[昇腾文档中心-HCCL 性能测试工具使用指南](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha003/devaids/hccltool/HCCLpertest_16_0001.html) @@ -193,7 +193,9 @@ mpirun -n 2 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 2 执行下列脚本,将选手代码拷贝到 `/result` 目录下 ```bash -bash /workspace/cann-hccl/submit.sh +cd /home/hccluser/cann-hccl + +bash submit.sh ``` 该脚本将选手编写的定制算法文件拷贝至 `/result` 目录下,用于后续评测: @@ -242,7 +244,7 @@ export ASCEND_GLOBAL_LOG_LEVEL=1 # 0: debug, 1: info, 2: warn, 3: error 设置日志存储目录: ```bash -export ASCEND_PROCESS_LOG_PATH=/workspace/log # 默认为:$HOME/ascend/log +export ASCEND_PROCESS_LOG_PATH=/home/hccluser/log # 默认为:$HOME/ascend/log ``` 设置日志输出到控制台: @@ -266,14 +268,14 @@ export ASCEND_HOST_LOG_FILE_NUM=1000 > 【注意】选手本地开发编译 HCCL 代码时默认已开启 `-O0 -g` 编译选项,但最终评测时会开启 `-O3` ```bash -cd /usr/local/Ascend/ascend-toolkit/latest/tools/hccl_test +cd /home/hccluser/Ascend/ascend-toolkit/latest/tools/hccl_test # 512K -gdb --args mpirun -n 2 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 2 +gdb --args mpirun -n 4 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 # 2M -gdb --args mpirun -n 2 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 2 +gdb --args mpirun -n 4 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 4 # 64M -gdb --args mpirun -n 2 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 2 +gdb --args mpirun -n 4 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 ``` ### 10.3 Wrong answer 问题 diff --git a/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc b/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc index cd73e75..66dface 100644 --- a/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc +++ b/src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc @@ -24,9 +24,9 @@ CustomAllReduceOperator::~CustomAllReduceOperator() {} HcclResult CustomAllReduceOperator::SelectAlg(const std::string &tag, const OpParam ¶m, std::string &algName, std::string &newTag) { - constexpr u64 HCCL_CONTEST_SMALL_COUNT_KB = 512 * 1024; // 512KB - constexpr u64 HCCL_CONTEST_MEDIUM_COUNT_KB = 2 * 1024 * 1024; // 2MB - constexpr u64 HCCL_CONTEST_HUGE_COUNT_KB = 64 * 1024 * 1024; // 64MB + constexpr u64 HCCL_CONTEST_SMALL_COUNT = 512 * 1024; // 512KB + constexpr u64 HCCL_CONTEST_MEDIUM_COUNT = 2 * 1024 * 1024; // 2MB + constexpr u64 HCCL_CONTEST_HUGE_COUNT = 64 * 1024 * 1024; // 64MB // 算法选择逻辑 // TODO: 选手可根据数据量大小选择合适的 Executor @@ -36,9 +36,9 @@ HcclResult CustomAllReduceOperator::SelectAlg(const std::string &tag, const OpPa u32 unitSize = SIZE_TABLE[param.DataDes.dataType]; u64 dataSize = param.DataDes.count * unitSize; // 单位:字节,三种数据量:512K/2M/64M - if (dataSize <= HCCL_CONTEST_SMALL_COUNT_KB) { + if (dataSize <= HCCL_CONTEST_SMALL_COUNT) { algName = "CustomSmallAllReduceMeshExecutor"; - } else if (dataSize <= HCCL_CONTEST_MEDIUM_COUNT_KB) { + } else if (dataSize <= HCCL_CONTEST_MEDIUM_COUNT) { algName = "CustomMediumAllReduceMeshExecutor"; } else { algName = "CustomHugeAllReduceMeshExecutor"; diff --git a/submit.sh b/submit.sh index 655e94b..b7b0d0a 100755 --- a/submit.sh +++ b/submit.sh @@ -12,7 +12,7 @@ error() { echo -e "${red}$*${none}" && exit 1; } warning() { echo -e "${yellow}$*${none}"; } info() { echo -e "${green}$*${none}"; } -src_dir="/workspace/cann-hccl" +src_dir="/home/hccluser/cann-hccl" dst_dir="/result" operator_dir="src/domain/collective_communication/algorithm/impl/operator" -- Gitee From cf6051654aaca24e2c0053f34e05c22aa2299df4 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Mon, 21 Jul 2025 06:36:44 +0000 Subject: [PATCH 05/11] =?UTF-8?q?!73=20[HCCL=E7=AB=9E=E8=B5=9B]=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=B5=9B=E9=A2=98=E6=96=87=E6=A1=A3=20*=20up?= =?UTF-8?q?date=20doc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contest.md | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/contest.md b/contest.md index d4a35e8..6ee60ee 100644 --- a/contest.md +++ b/contest.md @@ -67,8 +67,10 @@ ssh root@ip -p port ``` |-- /dev -| |-- davinci1 # NPU1 -| `-- davinci2 # NPU2 +| |-- davinci0 # NPU1 +| |-- davinci1 # NPU2 +| |-- davinci2 # NPU3 +| `-- davinci3 # NPU4 |-- /usr/local/Ascend | `-- driver # NPU 驱动安装目录 |-- /home/hccluser/Ascend @@ -158,10 +160,12 @@ cd /home/hccluser/cann-hccl/output ```bash cd /home/hccluser/cann-hccl -# 编译测试用例 +# 编译测试用例,并自动执行 bash build.sh --nlohmann_path /home/hccluser/nlohmann_json/include --test --open_hccl_test -# 执行测试用例 +# 手动执行测试用例 +export BUILD_TEST_DIR="/home/hccluser/cann-hccl/build/test/" +export LD_LIBRARY_PATH="${BUILD_TEST_DIR}:${LD_LIBRARY_PATH}" ./build/test/open_hccl_test ``` @@ -265,17 +269,15 @@ export ASCEND_HOST_LOG_FILE_NUM=1000 使用 gdb 调试: -> 【注意】选手本地开发编译 HCCL 代码时默认已开启 `-O0 -g` 编译选项,但最终评测时会开启 `-O3` +> 【注意】编译算法分析器依赖的 HCCL 代码时默认已开启 `-O0 -g` 编译选项 ```bash -cd /home/hccluser/Ascend/ascend-toolkit/latest/tools/hccl_test +cd /home/hccluser/cann-hccl -# 512K -gdb --args mpirun -n 4 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 -# 2M -gdb --args mpirun -n 4 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 4 -# 64M -gdb --args mpirun -n 4 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 +# 基于算法分析器调试 HCCL 定制算法 +export BUILD_TEST_DIR="/home/hccluser/cann-hccl/build/test/" +export LD_LIBRARY_PATH="${BUILD_TEST_DIR}:${LD_LIBRARY_PATH}" +gdb --args ./build/test/open_hccl_test ``` ### 10.3 Wrong answer 问题 -- Gitee From 867c82cb17e2b48ba2faa78c0e883994f6e724fb Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Wed, 23 Jul 2025 02:37:19 +0000 Subject: [PATCH 06/11] =?UTF-8?q?!74=20[HCCL=E7=AB=9E=E8=B5=9B]=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=B5=9B=E9=A2=98=E8=B5=84=E6=96=99=EF=BC=8C?= =?UTF-8?q?=E5=AE=8C=E5=96=84=E6=80=A7=E8=83=BD=E8=AF=84=E6=B5=8B=E5=91=BD?= =?UTF-8?q?=E4=BB=A4=20*=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contest.md | 25 ++++++++++++++++--- .../algorithm/testcase/testcase_all_reduce.cc | 6 ++--- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/contest.md b/contest.md index 6ee60ee..2179668 100644 --- a/contest.md +++ b/contest.md @@ -183,14 +183,31 @@ make MPI_HOME=/home/hccluser/mpich ASCEND_DIR=/home/hccluser/Ascend/ascend-toolk # 执行 HCCL Test # 512K -mpirun -n 4 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 +mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 -w 100 -n 500 # 2M -mpirun -n 4 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 4 +mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 4 -w 100 -n 500 # 64M -mpirun -n 4 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 +mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 -w 100 -n 500 ``` -> 工具详细说明可参考:[昇腾文档中心-HCCL 性能测试工具使用指南](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha003/devaids/hccltool/HCCLpertest_16_0001.html) +各参数解释如下,详细说明可参考:[昇腾文档中心-HCCL 性能测试工具使用指南][9] + +[9]: https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha003/devaids/hccltool/HCCLpertest_16_0001.html + +```bash +mpirun -np 4 \ # MPI 进程数量 + taskset -c 0,2,4,6 \ # 将 MPI 进程绑定到 0,2,4,6 CPU 核(设置 CPU 亲和性,避免操作系统调度干扰,降低波动) + ./bin/all_reduce_test \ # 可执行文件路径 + -b 512k \ # 测试数据大小的最小值,单位:Byte + -e 512k \ # 测试数据大小的最大值,单位:Byte + -d fp32 \ # 测试数据的数据类型 + -o sum \ # Reduce 操作类型 + -p 4 \ # NPU 数量 + -w 100 \ # 预热迭代次数,不计入性能统计 + -n 500 # 迭代次数 +``` + +> 【注意】赛事工作组评测选手代码时会执行 10 次上述命令,取带宽的均值作为性能得分 ## 8. 提交代码 diff --git a/test/algorithm/testcase/testcase_all_reduce.cc b/test/algorithm/testcase/testcase_all_reduce.cc index 9f87d77..e3f10e3 100644 --- a/test/algorithm/testcase/testcase_all_reduce.cc +++ b/test/algorithm/testcase/testcase_all_reduce.cc @@ -1865,7 +1865,7 @@ TEST_F(AllReduceTest, allreduce_contest_test_910b_4g_int8) gen.GenTopoMeta(topoMeta, 1, 1, 8); // 4g + int8 - u64 size = 4 * 1024 * 1024 * 1024; + u64 size = 4LLU * 1024 * 1024 * 1024; auto dataType = CheckerDataType::DATA_TYPE_INT8; CheckerOpParam checkerOpParam; checkerOpParam.opType = CheckerOpType::ALLREDUCE; @@ -1995,7 +1995,7 @@ TEST_F(AllReduceTest, allreduce_contest_test_910b_4g_fp16) gen.GenTopoMeta(topoMeta, 1, 1, 8); // 4g + fp16 - u64 size = 4 * 1024 * 1024 * 1024; + u64 size = 4LLU * 1024 * 1024 * 1024; auto dataType = CheckerDataType::DATA_TYPE_FP16; CheckerOpParam checkerOpParam; checkerOpParam.opType = CheckerOpType::ALLREDUCE; @@ -2125,7 +2125,7 @@ TEST_F(AllReduceTest, allreduce_contest_test_910b_4g_fp32) gen.GenTopoMeta(topoMeta, 1, 1, 8); // 4g + fp32 - u64 size = 4 * 1024 * 1024 * 1024; + u64 size = 4LLU * 1024 * 1024 * 1024; auto dataType = CheckerDataType::DATA_TYPE_FP32; CheckerOpParam checkerOpParam; checkerOpParam.opType = CheckerOpType::ALLREDUCE; -- Gitee From 56b95d255f4ae41ecc931d6798fdd4788c936045 Mon Sep 17 00:00:00 2001 From: Yuanhao Ji Date: Thu, 24 Jul 2025 11:36:25 +0000 Subject: [PATCH 07/11] =?UTF-8?q?!75=20[HCCL=E7=AB=9E=E8=B5=9B]=20?= =?UTF-8?q?=E6=96=B0=E5=A2=9E=E8=AF=84=E6=B5=8B=E8=84=9A=E6=9C=AC=E3=80=81?= =?UTF-8?q?profiling=20=E6=96=87=E6=A1=A3=20*=20eval?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contest.md | 52 +++++++++++ eval.py | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 316 insertions(+) create mode 100644 eval.py diff --git a/contest.md b/contest.md index 2179668..9a69ca4 100644 --- a/contest.md +++ b/contest.md @@ -151,6 +151,19 @@ cd /home/hccluser/cann-hccl/output ## 7. 测试代码 +> 【注意】选手可使用评测脚本进行验证: + +```bash +cd /home/hccluser/cann-hccl + +# 查看使用方法(脚本作用:解析测试工具输出的字符串) +python3 eval.py --help +# 执行算法分析器用例 +python3 eval.py --llt +# 执行 HCCLTest 工具用例(3 种数据量的用例各执行 10 次,每次执行间隔 5s) +python3 eval.py --hccltest -n 10 -i 5 +``` + ### 7.1 算法分析器验证 > 【注意】算法分析器能够在无昇腾 NPU 场景下离线测试算法逻辑,包括:死锁检测、资源校验、内存冲突校验等 @@ -209,6 +222,45 @@ mpirun -np 4 \ # MPI 进程数量 > 【注意】赛事工作组评测选手代码时会执行 10 次上述命令,取带宽的均值作为性能得分 +### 7.3 使用 Profiling 工具分析程序性能 + +> 【注意】开启 profiling 后性能会有所下降 + +1. 生成 profiling 数据 + +```bash +# 开启 Profiling 开关 +export HCCL_TEST_PROFILING=1 +export HCCL_TEST_PROFILING_PATH=/home/hccluser/prof + +# 执行 HCCLTest 用例 +# 会在 /home/hccluser/prof 目录下生成 4 个文件夹,对应每张 NPU 卡 +cd /home/hccluser/Ascend/ascend-toolkit/latest/tools/hccl_test +mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 -w 100 -n 500 + +# 导出 Profiling 结果 +cd /home/hccluser/prof +msprof --export=on --output=./ + +# 把每张 NPU 的 Profiling 结果复制到 timeline 目录,包含 4 个 json 文件 +mkdir -p timeline +cp -i PROF*/mindstudio_profiler_output/msprof*.json timeline/ +``` + +2. 复制 profiling 结果到本地 + +在选手本地 PC 终端中使用 `scp` 命令将 profiling 结果复制到本地桌面: + +```bash +scp -P PORT hccluser@IP:/home/hccluser/prof/timeline/*.json ~/Desktop +``` + +3. 使用 Chrome 浏览器打开 profiling 结果 + +浏览器打开:`chrome://tracing`,将 json 文件拖拽到浏览器中,即可打开 + +使用方法:通过键盘上的快捷键(w:放大,s:缩小,a:左移,d:右移)进行查看 + ## 8. 提交代码 执行下列脚本,将选手代码拷贝到 `/result` 目录下 diff --git a/eval.py b/eval.py new file mode 100644 index 0000000..5b85020 --- /dev/null +++ b/eval.py @@ -0,0 +1,264 @@ +import argparse +import subprocess +import csv +import time +import math +import os +import logging +import re + +from typing import List, Optional, Union, Dict, Tuple + + +# 日志 +logger = logging.getLogger("hccl_eval_logger") +logger.setLevel(logging.DEBUG) +# 日志文件打印 +file_fmt = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +file_handler = logging.FileHandler("hccl_contest_eval.log") +file_handler.setLevel(logging.DEBUG) +file_handler.setFormatter(file_fmt) +# 控制台打印 +console_fmt = logging.Formatter("%(message)s") +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +console_handler.setFormatter(console_fmt) +logger.addHandler(file_handler) +logger.addHandler(console_handler) + +cmd_t = Union[List[str], str] + +ascend_home_path: str = os.getenv("ASCEND_HOME_PATH", default="") + + +def exec( + cmd: cmd_t, + /, + pwd: Optional[str] = None, + env: Optional[Dict[str, str]] = None, +) -> Tuple[int, str, str]: + """执行命令并获取输出""" + result = subprocess.run( + cmd, + cwd=pwd, + env=env, + shell=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + return result.returncode, result.stdout, result.stderr + + +class HcclTestResult: + data_size: int = 0 # 数据大小(Bytes) + aveg_time: float = 0.0 # 平均时间(us) + alg_bandwidth: float = 0.0 # 算法带宽(GB/s) + check_result: str = "failed" # 检查结果 + + @property + def headers(self) -> List[str]: + return [ + "data_size(Bytes)", + "aveg_time(us)", + "alg_bandwidth(GB/s)", + "check_result", + ] + + def __str__(self): + return f"alg_bandwidth: {self.alg_bandwidth}, check_result: {self.check_result}" + + @classmethod + def parse(cls, output: str): + """ + 解析 HCCLTest 输出结果 + + 结果正确输出样例: + + $ mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 -w 100 -n 500 + the minbytes is 524288, maxbytes is 524288, iters is 500, warmup_iters is 100 + data_size(Bytes): | aveg_time(us): | alg_bandwidth(GB/s): | check_result: + 524288 | 102.29 | 5.12530 | success + + 结果错误输出样例: + + $ mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 -w 100 -n 500 + the minbytes is 67108864, maxbytes is 67108864, iters is 500, warmup_iters is 100 + check buf[14783552] error, exp:8.000000, act:6.000000 + total err is 192 + rank id 0, check result failed, 67108864 | 3665.90 | 18.30623 | failed + data_size(Bytes): | aveg_time(us): | alg_bandwidth(GB/s): | check_result: + 67108864 | 3665.90 | 18.30623 | failed + """ + + headers = [ + "data_size(Bytes)", + "aveg_time(us)", + "alg_bandwidth(GB/s)", + "check_result", + ] + + lines = output.splitlines() + test_rst = HcclTestResult() + + def parse_line(line: str) -> HcclTestResult: + parts = [p.strip() for p in line.split("|")] + try: + rst = HcclTestResult() + rst.data_size = int(parts[0]) + rst.aveg_time = float(parts[1]) + rst.alg_bandwidth = float(parts[2]) + rst.check_result = parts[3] + except (ValueError, IndexError) as e: + logger.error("Failed to parse: %s", line) + logger.exception("Error: %s", e) + raise e + return rst + + for idx, line in enumerate(lines): + # 标题行 + if all(header in line for header in headers): + # 解析标题行的下一行 + assert idx < len(line) + test_rst = parse_line(lines[idx + 1]) + + # 结果错误,带宽设为 0,不得分 + failed_pos = line.find("check result failed") + if failed_pos >= 0: + logger.debug("Check result failed") + # 解析错误行结果 + last_comma_pos = line.find(",", failed_pos) + test_rst = parse_line(line[last_comma_pos + 1 :]) + test_rst.alg_bandwidth = 0.0 + return test_rst + + return test_rst + + +def eval_hccl_test( + *, + npus: int = 4, + iters: int = 10, + interval: int = 5, +): + """ + 评测 HCCLTest + + 分别执行 3 种数据量 10 次,取带宽均值: + mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 -w 100 -n 500 + mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 4 -w 100 -n 500 + mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 -w 100 -n 500 + """ + + data_sizes = ["512k", "2m", "64m"] + pwd = os.path.join(ascend_home_path, "tools", "hccl_test") + + # 3 种数据量 + for size in data_sizes: + cores = ",".join(str(2 * i) for i in range(npus)) + cmd = f"mpirun -np {npus} taskset -c {cores} ./bin/all_reduce_test -b {size} -e {size} -d fp32 -o sum -p {npus} -w 100 -n 500" + + # 跑 10 次测试 + results: List[HcclTestResult] = [] + for i in range(iters): + logger.debug("[%s][%d/%d] Evaluating with cmd: %s", size, i + 1, iters, cmd) + # 执行命令 + _, output, _ = exec(cmd, pwd=pwd) + logger.debug("[%s][%d/%d] Output:\n%s", size, i + 1, iters, output) + # 解析输出 + rst = HcclTestResult.parse(output) + results.append(rst) + logger.info("[%s][%d/%d] %s", size, i + 1, iters, rst) + + if i < iters - 1 and interval > 0: + time.sleep(interval) + + total_bw = math.fsum(rst.alg_bandwidth for rst in results) + aveg_bw = total_bw / iters + logger.warning("Data size: %s, average bandwidth: %f(GB/s)", size, aveg_bw) + + +def eval_gtest(): + """ + 评测算法分析器用例,执行 5 种数据量、3 种数据类型共 15 个用例 + + 正确结果样例: + + [----------] 15 tests from AllReduceTest (503 ms total) + + [----------] Global test environment tear-down + [==========] 15 tests from 1 test suite ran. (503 ms total) + [ PASSED ] 15 tests. + + 错误结果样例: + + [----------] 15 tests from AllReduceTest (233 ms total) + + [----------] Global test environment tear-down + [==========] 15 tests from 1 test suite ran. (234 ms total) + [ PASSED ] 14 tests. + [ FAILED ] 1 tests, listed below: + [ FAILED ] AllReduceTest.allreduce_contest_test_910b_512k_int8 + + 1 FAILED TESTS + """ + ld_library_path = os.getenv("LD_LIBRARY_PATH", "") + build_test_path = f"/home/hccluser/cann-hccl/build/test" + env = None + if build_test_path not in ld_library_path: + env = {"LD_LIBRARY_PATH": f"{build_test_path}:{ld_library_path}"} + + cmd = "./open_hccl_test" + logger.debug("Evaluating with cmd: %s", cmd) + _, output, _ = exec(cmd, env=env, pwd=build_test_path) + logger.debug("Output:\n%s", output) + + # 通过数量 + passed_match = re.search(r"\[ PASSED \] (\d+) tests?\.", output) + passed_count = int(passed_match.group(1)) if passed_match else 0 + + # 失败数量 + failed_match = re.search(r"\[ FAILED \] (\d+) tests?", output) + failed_count = int(failed_match.group(1)) if failed_match else 0 + + # 失败用例列表 + failed_tests = [] + if failed_count > 0: + failed_tests = set(re.findall(r"\[ FAILED \] (\w+\.\w+)", output)) + + logger.info("[ PASSED ] %d tests.", passed_count) + if failed_count > 0: + logger.info("[ FAILED ] %d tests, listed below:", failed_count) + for failed_test in failed_tests: + logger.info("[ FAILED ] %s", failed_test) + + +def parse_args(): + parser = argparse.ArgumentParser(description="Evaluation tool") + parser.add_argument("--llt", action="store_true", help="LLT tests") + parser.add_argument("--hccltest", action="store_true", help="HCCLTest tests") + parser.add_argument("-p", "--npus", type=int, default=4, help="HCCLTest tests - NPU count") + parser.add_argument("-n", "--iters", type=int, default=10, help="HCCLTest tests - iterations") + parser.add_argument("-i", "--interval", type=int, default=5, help="HCCLTest tests - interval") + return parser.parse_args() + + +def main(): + args = parse_args() + + if args.hccltest: + logger.info("Evaluating by HcclTest") + eval_hccl_test( + npus=args.npus, + iters=args.iters, + interval=args.interval, + ) + + if args.llt: + logger.info("Evaluating LLT tests") + eval_gtest() + + +if __name__ == "__main__": + main() -- Gitee From cd59d826f132d170888a3b648182031a3e773624 Mon Sep 17 00:00:00 2001 From: fangmiao Date: Fri, 25 Jul 2025 12:54:39 +0000 Subject: [PATCH 08/11] add final round question --- img/final_round_question.jpg | Bin 0 -> 80399 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 img/final_round_question.jpg diff --git a/img/final_round_question.jpg b/img/final_round_question.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a6bc6582edd81b2a8cc20ac9b2f97aa1c0e81e0d GIT binary patch literal 80399 zcmdqI2T;@Bw=Wt5r1xG!2dUDP76EA@B1lJwO0N+S2sMDxn}C3VfYLkCd+*YFFA2RT zC=elpo8NiooqOiaee=%$&b)bd?#_45d}p$=W_|YFS!?ajTKjh4b^}2DL{mo-fQN?% zc!m1_ZkGY-0Q@_5{&{eX0QV3Q6A}^-5RwuR-6bX`B_}5%B_pGtq@|{yq@g4uqh_S0 zp`&MDU?8VrVrHagrln_~|3?TOKJGUJgd~K7B=i(y6!iacyX^$f5aV^@bK&Fh0PfJ> z;nU#V_5y$a03IPu+kX`P_l9=|r{i5BViHm^+<-c2z#Tk%{5u5r|7eXH9fFw(u7##XFH9a#sH@~pB1V?OcZSU;v?H{1de*ZbYxJ3WG z`Ue*t0RR7j^&iOo2V68bTz3cv@Ck_i!G(9n2iNgw2ne|)@6xIo5WV)GrdLb+ zQPoAlD{Y8mc=K+Ol#x#c&X4*B+P@+DuK^4FUqbdD!2Ua~B>*`-9&YjQX#gO=&5N99 zA;AAJJ*T-jyno3h<7;k1Ll}9Io(SRN=ThcT8-TkyvIE1Y8u*r^Qru=GMT&-~n)8HY z=O4T?{h!6}Vtu^j3)RXY@UG!WIqzE2&rrdO60|Ogbm<_FZNSeo#TCwno%pmMr#(*g zmgb9puvVMbWPlOUu0O5V*#s*x>0b7JA!yRy!wp7X>IMWUrr6ZJ^;JosDSh??l?0Q{ zsQ}0R5G1X&V{Cp(QT9WWMkXKvXOcQ@NA? z=Z~A2n+MV;fyj|f@!D(W_`LX`#ODM?*M8u!_CGE+X$fuZm5R53aPXF29K){O^<#|e zE#RY~3ChS?daB|*g;0sL^|Jm>nftoe{U-j5>R_EC;oWGgJesLXh3F?Lyb7VK2&M%8 zh1s+l+FHhK3Q-I3S4%F}3f}vO@$0h4>1dQ5Z-_O2KVR^|)Qi`u^@Fn$gV4CCDEudLS%9o*`3_AV@d6Ml0 zD|PV;O1AS%;PJoC3*qB)$J0$rj`uJgXR8a04JrqxN?*87wPj}2udN=>SJh~|OT?HX zmFAcls?ftdr*>~bElX2oxg?i=y!w&nhJJ1qk{qwika!EAphFg7qkTTWkT9oPz`#9) zpO!;`JBgi$!XOJnZ|vbMfOn#(NVL-JT;zfHv0(JU6hbzYW#cVsFjlIp>;YFOF>i&l z-^8XN?qQ^7iIG4D<3P`sO3 zAA)FB96zCJ%&68@IwS9l??`L81^;Nb3^yQqt3|qn)uKLp5g)I6s^GMZ{x$R~lSL+FHD@&~ zu>QBQef0L9%4ln{d0AptnRO;dg|I&Zt|oKM-=v1P``?z7>1=SO%orEna~*r~xx~=B zm@zy3r-258rm6=0bG-iz`(03MfKt0SGQR1D($1o+F3VBuK}+l#?L~PT-UqAS>^u0| z<)MN)XnPqkBK0W?XqgwDjY@F|+5{>tkw<<#~?!yDchr}zatO<}_t3MmN61F9O_3a#9z7f{+J&-)7`iLdp zPZvD;jmqw8=ZtwC3ZZu7>9!)(qqQUqM=%q8c@?*qnYg+T8;{;)_+1%?<+Mb9*w~Fy z@`A4dwG+#JpPz*n#GR1e?Q);HE)3KJ|ASe^{lJ4+rw5e$Sbjw zcVrxDQuf*Ik9vaH6cdC+%25KzMFAG*Vn~s?3v!@)r~o~-+jC_XHWwkITV_>TT;-m+ zwx;nJ_{rYHAT{W#>=>HH1~t&zbo|~3l^rcZfW%m2_Jw~%K*SMUD5|Vekpd1I2RmVE z+9y|@O$9?GhhVyjbEM{E7o?qQcu0b0wKjX=aVFF02_6211wMj3S zPBQpYA{W1vauN>`MW2-$?sne-7;gb~hZN2nHii@D*M6FO#YDVY`zeGMYH0v z8&w>bxLpxV)rDn8L1KsNF`M~#o_9iEdB`WNYZqM=@1V@P<@G};adv8_jvM8Ug zExkOu2BXz(0W~0}Y_w_inE=dY0cL*-sJPB{X;{U!TxR?GtnDKEJcIb=RhWmaANJVT zAagB$q8e*zDG>W6s*Y16Hc~p+pA{VHMSuMo_pJmYF{U=2`9Txy-1W$|^^YP4ry zX6pK63E5Y(GiCKLOY&g4(`t;q1%qw8F z)jyl3#k|bYjq2K-^XXL#dd3s;>ku`6@}HN5-vWS+V6t0)J&YWk-P>??`SDNWNtU}U z%~wiR=0qc*tyBl&fi`b21t#!Ez;ooY6aaSL$1Y-{Ea3s^_tUs%mnbT|PiF)xtv zQA|DZ?;hw=cX2rpt_Xb(D4uKxGqU5uJ8S$K0n#?@>!=m$y9M+oEQ@r4DgKn3oa$B~ zk3TMD75mtUXrfr?JWPaDyNp6$$5kAMN)9|9eN$Fvn_Yr-O)0f#MD@Tn25Kr}?rs%! zgSq9Kb04L&sP4e)sn)!vmJFXLbo8 zq2AHwKs;5H7M$w19C?kKtmq)21FX!nm!90NVd}$zl-I8>{5y*V#}>K2Q)CzB2h^c# z;3?yuR9riY=N4%!P$0-CG`fsA8G;si`tYg%CT~PJ-Ue z+vj{R5hM-`KQ(MH`hC13H191j`@^&4E&I$7whptU&VH^fp-MZSUj-6_*T+v)mm<5nXbOUrQaIOJik3TS4dHp6OOcF}Im`sBLj z#Y@WtO@#V(-`jWtOCSC`W;RVN*$uzUrkIT+XZ4Q?-g|pv=}m--MtEO+1?+v z{N-q}m&#%j?=h-0AKe$27f6iODReUDf8Q`&{&fXA2qKmNGo#15vIMVXuI{$M2eWSa zD#Ht^YiC|3F>gMf)p209aV}hs7$B>}(0!yno{wq(0#Md^9klbS$T=9q6;> zVN)Xv(cvT7 zNiZoi=AsGR0EaB@BD8u-Z(KS>JmC=fRjB>69xk?bK4!#fySI-p4=#N_yG%7zRufk7 zIp<79xm-k|BD#QKOGJ^U=i}tilU-Aka(sLPClhk^Z*BP9M(WkVlZpzmr+Es6BYDk7 zG&KRWc;^fZe{TT@2wLc}mqMa9i+lckN7yH480jC~`Kf|gRth)r`0DDRfcX!4_j?Gq zTvLhEg#wPmMUIa}2AVl@eRB7woa^P9FNzD}o~&!7!-P>AovS=uTsq#xuSjeSsR-^N zf)#{?a)L?!Kk8iVb=O`geGQpn9hq!eyN{M_pT@esScu&MTKwhjcnzXMHy^?h+8>~E zdrgu;YZlFa)Kf;3Fgmf*%$NA=i5xD8WL0ox`ul76`_?RU+AoZh>mDQ-yjwl%Yg3fV z0E+e0@RjtD zyL}*9uXXx3Lk$-_!&4qz?ahTrO`{gyV1@1K_51yDo3elS8PdBZ^p@n>)1A;9?+~JM z&cgXV;wE|D$_3JW^mGd5^N#|CE3uu{fTp&pnO5oI!?ineZpTwgy6cqIzCcG(farm0 z56AbUd{w1S`m!IsW4kOm!dxj*V^H%0>B@&?F?t_yE+H{^YipZt&AKk zXUkKEXL1Sj4m1%pUMp#rpY6~?@`cX^K0u`}^nOmXGo$Cba27gNNfmJjV$im0Zk-Xg z9E%zRD%k#9ZM@6(p)r{4Epx>SoFzu;kK?1WjgcvB?S;krD&&(qJP-=YU!x3Li$r7M z`oE5(_+F9SC&hbdI`i3BiJwG6V*rBoBn{9=pDGoeIjr4U_O3Q{Nxi5`u8+$1n)tiC zxqjqPfU%udk}PxL6zV|A{rGN%^GC}nX^h5h{oW<;PX+k*2bDg=nwlaLF5Vy=x=+LX z^gmR*$7$O0+WRWFdCoN_C2kscUQ$M^1|TKgB4p31JDf2tlVy-M&@!=6I+ZLA3nw+s zu8C>35Wg{OSv|SRdU&md9Zd%xQ+VKPd!WVH)PVYo%Dh}Q%?MM~H+N0z(N4U-oes=@ ztCbEBmq&tB{XkApqV!+6YE6s9SvvK(~P$$Crs2TK;GJFoRN0!dH1s?dg~PaE2iXG{DE3*wu! z{S!H1#$4x}2IyetlMwigba%6IL2|B{rIVjsj_+Fwi^sgt{D)x%omeIU>^3#&7&HC5Q)3~y-vKMa& zpY@UlhuJ-Upfq1+@WSw$aH-zQy>1&1X4b4}3VEi2RoChH=H_SPdQ`G#IwVo|dd=mB zXCL4TteW4VBq~qdPFri=l?S~ddSK8wbfTl;LUeR*W0Y)DC3*9d?7%NAEVsVtT5m3@ zaL{mVBlc6#FXLRbWnICMJc{oCm;EOqg5kMB{o=xt%Y!F{nL9EEvz4r8J0t9ta zr>7fqfif85o6oIIDA0gHR*s4+^n)sLw6~PMJ;H&G4ssR;x1+7Vb3^}qhKK*nh%p=a z?DXT$y;8z%mo&fKbO#7cebHgSm|qsiXFl<>;L30Cv&rYk_w3*D*&5ct8PCh~XjokL>%S>sR zRzHLhyf*OVx;AZ{)POwud`~NtOns<-vvh|cuwB43b-o%Lm^45BHYZx-MX*p#G~i$9 z!UYvk0?B^WW|@X^Aw6cnA6F;}CEzmC z+QEaIqy(D#6cv{GwOnJ{X^kUml{Xb3N7`?%W1k>GZvZEA>n3ly4vz+4Qs-f|nTET1 z?enr=GH3xfB8vxQJMpx5!%FV3FTC$~Hx+^2)b}6u5L+tgj&O>W!VhUO3yO9;^(qZ` zf($RoCN*Sxh>>l>*p&>qFc#-aGrTTnec<%&o%w}IXJ@>=>_P4S|4f69t~C=PJC0Lm z8sm$wV$!RfoEz6^3SZ@G(XrMcTk4S%bOAif03unZG9MbJi82<7^U~>(P&t_|a_(yc zT;Br7YWi;hzk7PtFA*^8LoJN>I6G{diVC(KbqhdmzrT6bahBz9v4f8d3ZlX!d@ri} z#&`1rqtBR=?!SUIkXW%3vmB-U*$>s12j%Mm2gVsky_9R+;8EhQ0K=qteVar-UhmjC z5S2@mm>hwQ%cy>1abj|cD${Ll^6M7MPTV(V0yy}~i+g|P4jS@er%B#|SnCRdCRBvy z#M&ocON3RHpP7!82m@FySr=Q+{BdTP-^-|zevI+Y^oazKm-KG)c6 zlJ8VB{&u1aUTzbY?8HZDKC`>6TpEwAxi)kPB1B8y0!Yfka-%9MDN_ms3dQnk$-He6 zCsuTnXK47qEnHQfIUfO%2z5}|8}4l=roPF8X1BFW!sewsLifBx@LPMqFiqya5q`U^ zdPrd3JdjFB1U~F0uQvyNI=eNylL}-3L;bW@G$(tj#@!r5^JrvR>T4gn8KjbT251s7 zWa>XG*P?s2U>9WSc2VEqdt{RTA)c15P-4x=%QJMMVSfqUq+ME^b1t|;vG675VpzCh;&gaMtp(5 zuVMW_8e+Tq0}jH}$!5A7TXu-9R3p*1gOb~&6xoazyq3nB?->0+P{g^|3oy#6BceBp zr!L;8wl3yc-7x8jC{rzWfZ;Qv2Z6`$!BpId;WfBH$#cD(89Lwh4jiC1t>oa7cgSST z)EX^xf&u00y{YNFc)qeZl(gg7%JKw>e!x!Jg?@OgwT{t6QH7Vu>XV!A)n|z!kFta& z#AR+At~J(1ISIX5^#vr|#KpgNnxv5YW*YfLv5;1cURvLj=*=caJ9i)IW%owU|3t?h zK5djUiB5lfUzO*j8+C2yzNw_U!&0nw$fzf769f^jjv`hCkJm}Kk>ZhSz>H_8GQa%M z6pV^kGfZUIm+_vRyd8%-`>IvR)5lw{{#FO$9jqy%n-EmQ0Y=kGviVS6BUEPQT#Q^U zpC4bU!AcL0KKrX0tJTg7V$bdq*Uj%0S{LCrKLT`+9dKA!fR0;k&&KevD*BmLL6$XuMXcUZVe4q&J%c3q0dC~ycKVqZ+$1=Bo7)}~=*anbF;zO%!kp^RS$-0JH z8Rp4sokqH?Klg6yrG;yf$iNN_bI&VXEYSC-)}A2!O5q|W&UGN#Y41-ssq?=%37_8_SUJDR8b(c>PZ_4$B&LjMzdgNo|N7(s)OItPpw*#t*QWm%tbkMJTz!b-|DJ*7}r(Nlw4_ z#_dY>JQri!II-I&?(!N0ZUs)Mm%dStev^wv))1KA;S7}60C&;kVvG~|MK*2$)M#Ga z$v^jGIOKZzta;tuU=2in4)NY)^3kDab%D?hunu2ly)k$ojyI16T_jJns>8>l*0^VD z^>n+_F2R&eFZUkH(Aa>cM&?j(UCsTZ%rK0eK{Y* z(umTGBVZj^rkJ62z1C>!I^(5&skOU&Vk9ki%3S(W{aZ_RNb_!t3~zWK{_ zi!m<_HMFH~0-3r$L9LkzB|QJlOqwIv zVlPXWII%&mY4qIOD2N|hfvoM2Jy(j9G8F~$>RbWe5nPaqla{yOE+ zZjGBPnislErSIc1=Er=tcz!-FlpRz>3ie*fcMrK@B#_>)2&PFiw5xtiJ#iOY+N)R~P|ASb_-~BmBC>(}TkS+Brrzr!Se`V(gYBuE-i;_MO-zZJ3i34U*?qE-9V7N)(bmi&^#DjxFJD-jxa)j`}&ZR zSz^y%kwJHB0#%q9XFn^q{}BJmrX%u_%;c`(SC&voH?$~#sXx1xXaGV2P3xs=2-)Cd z2){I|nidzAxoe!IU48jGhoczgs zbBqRN<~LuQE-x5qEVYzAjx^BFQRc$vWb|o^?-+aWMQCh-SkQ$RHYoaa8jMW|Gy={S zq%uh{5XCc%qd)cH!p%i1E!Myw-NsM^d^hQVk<7(+_qP+jaQx``=_1($cuuCxr9X8| zVM-QE-dR5fUOdj(ONV_FNPPKgEZb$$!MDIjvY&Lw+q02{ceSbr|0Eh7#2cWI+~c_f z_3ycuJXCo&n;4&&U&E2e#U;Bs#6IB0!t3evMonPX>jf?(BcFx?ugEZuZUH?B2q5-Z zCEZOf$RShF5H%kB{#Y80Z9*K&II$Mb$XY5(KgLDAQV);fN?tm1a+?P|5^P<{a{rz1 znLe&Q*nb=s3OP#PL5BzCvk0d!;G9Dh;i~%IyA9Kj?_X5r7Vi@$wea#H2hpz+XKd&5 zOou^~G7{`oxWsY&2ZWeBLJBH8{j9MqF2E83UnoD85G$y;1}<(-K5~p7XnesQTgt;| zRo2oMNVL&Gd!Qm{<*s`}8c4~eY>4O{KNej+<&OM#tvRP5RDsaT-60QoDM<+cY{DE* zW3C#y^zphX_pMo)P`AXcl0Z4+kYgMI=bbo z6_AzTwWd?&V)|y;y$Ua!PMr$JeFzIi)fRUf(_A40mR#oANglCG#$y*IXL>Mglzz`G zAOcnk3H<{KYk!C`uBhYxZSLZIX0F|!FBj#)HmK3J;zL*US=+3SZo9g;DdZ8}kvzE8 zh!rvbqQt1AkDIpWj!JExRF=02mi*O+)+`RtY21&>xt3(2O-u9Cj2K=O-rcF&kXVvd zoe!cvR1u#!XLDJsWh>2ado$i+a&{oS$b4cgV_N@V^sD8Y(x1E`O=GhJ@oZ1gmWAk+ zi_EEWWorpKbP@%bdb2dFTf@$iGR^MY^n$JfMah>pb=kvb1#b^`1}h@1hP4<6_SZ@N zXrifni_kGW%{Lv9OT#9Uds3y9Sq03OEX+CKZ}D-Ay)@c>cF$kkdoky`&X{~}mNUd(m<7kKIHFGiBD4WRpG&*#{_ZP)&g_HBMDg%*O z(_LA(WsR)OWW5FSS#!-{o?|(yW=>gNW7k{Km*(bl%^0^a`>gjK53#~>`$v|Q7S8aF zng#nooGNLrWWga>a;%BrlJ74XngPMOwGbi75=(o}*saaAc288taRSP;$8YA{<8+3G zh`d=ZOPxBAQ&Q!+xR2iRX^az^ULwcNV{gOZyx%zFKwwL zz{j5rLTxz;&ueW!jUd@%tExr$;!;wkkKgc4nvMl>NRJ8Y>FpeztmWJz)Yc`dopIe!__!oimBb_PI>W`CdXeQvKGCa+99p4bkIuY=RhFg6Po72rAlDvjt0Ngzluc7h`oa{G{9v zA;hW|s+K4d)#Q;`q)8&kOkg zu~y%3_-4HRv4i!`y#RSQkWwXK(?##Oxy9mxHl%oi8^OECS#95~8nZ-IBJmM=uQ4I~ zI%1Zt^^-zwls`VYc8fu8VO1sQhe@qon%n*jU-ju-H zlOT7zs!aaTa1me1`Sl`!)&;i68d#6cPaa9%9m;ULnVXPfPi2qJM?N_NGD15L;Mm&z zpvmtCOSN+8Zc`7S6;pFd4{Wof9thdfX%fR*GkZes1u39)%~2NO7_y@E~8r6fN_HLsPLFPr}X#^I6C8b z)AXy^l+y}pv4~&cnpsEeggV@Dww+o4fBoLMc4YR)0B6Yiz&FDhft;H?ob<>6+=gp= z0ur+P_EMMdv!STQil|2fn@zmzwuR{)u}!4SuDx|z^j$+^1HyKXb31SDOy zmM&Tu{ZPEh^gOyZ_ydP$u!esebOr@ohCd}Mb!xUdjro&`#6o14PgGOKuuChxnk($d z?C_`Vk<1ZhtIyLuW$nTpVr3;b(R6zAUV(RDUMn;hD|9$t`h(@`XUp3@fLp+z-1uB`_7d=- z2YK14{I2J5vVPON*H>>{Pd9vDH8M|(<-+SP7Y67=({dHL0BTgnNAUm;A20Z2!=s$z zb49bbEl8}Wdn{RMXJ@}>yvR!x0ea_k@A%f?FzJ`x;Aag#lL`&4tDju%SI=8#UU3Fq zT!a5!MPRWK=*j>4l?qjI3m}C#OMHaU2MOrFA0zqWQy7LL|Fj&&DKK7J`cOuHjm=sV zT=~!!$O{U_&^ZIrdK6fz_Ocfz89|Sj1lkzVyIs;Hr)SVd3dih za#>@`}vtIud6*BWte(0!W{5j_yVXum@z31z^dfIiY|Yn_l6fVdV!TAeg`Yecs8xuOM&5+|T!ydm znB@q_>&PtRN&Y;)honF>#^n9-E#bMT zS_+@Gyl8I$4X}-D&`tY1Ht6*&pd$t=6ZqfXW&gN>r~+6#sFOhCj)vS={%JVxEkLyx z2DgB!W5cYdVJiwitUz-|ZQ2=DQ2wgw7NFljg%(W5j-2I9p>J}%5@GdR+mu=u=(1@8 zWs17?gv>fs7ayy2rc*)paoc81$Qu(^yd-+^Z+PQ%V1_NDY^9Z!Y`gH>bHDrf{%56R z8uUgS|L;jg28J0N<6oB!5!Y$0mDuiW$^0oD#xZmxbjLyj0bzx887RrZ#ca)Vke8O# z>D{Xm=Pe*=mvp%dn4R-4Gvaa!RF=z+h79%XUrjHA&F~jp)*Bu`0BBfmJ1ZJeVO`Kr zx$Kx~W1M{dW!an$6WFmK`KjKc72%t2{lm1uGC zgw}K~Dfwmn!1*D~d=DqGVYl5L_J)1ONwp-(B%NBkq|R5tcz-AuHlnVb0#p)u{8G-h zRJg0>WY)>`5yP3Jr#eU@tK%j)iTQH%DI_}q7<1v@E{}P$`Um-mIAIGGRQPVyHY^8gdbO85! zfajwfiQmcX(J}DHZ^{vY%=aH~9njdqHfLbn{mASa3RZb6Ij+70QM%P}$7wEZW0XEa zn?$ZiM^aoL_+$k;QiG#$6=BzZI^fnL?@ny9<#9HI03C&r;QJPsx&}j16@cJ2PyTrbVsfD`{ZH`rbYWY#xoKcR+6WYI$Vu^XJBLjZTEB~47@!tpp7(2p3jJFRt1Su zSf>h3nXouG=!#Vaf??Lcg`qf>iVWBd`y;)#j=mp(tG}p>kV)XMa^d7amKMb2sh7Jn z$IS+dvB}onxtrm+8UDtElZlt_9AKh^GmTz?S!8Trs{MiIkq3#Z#P9!B%x=}B%G{An zAZNggY}eGH#sk(!@8P&`T!Ei!&97>2BiI=PO|%X}R<#P=0@O9*ZUG;NJJ2QnJ|}Vi z%n1z)Wqg+KvEnG;1w2sD%#!T^cGZP_n}>`imx1uJ+{3OxjELp6_&7G;_!e+Fg+t_8 zyF2)yjNsgs5@yCcJGm2L={ssPvsxV1Ft>AD$!m$DkL`_RfN%G+B3%ixDtrJ zGUH6nzSA~zeez7rzr{;%xpJxzLXPtf zJ`qX=#n6bJH6}gAYeW2F`MJ4g)lU>YePE8|%nO#jn)iozQPe(3w0OxQ@fzRcbDF5H zr3zcLw5x#6S6z;eAHV0N{LLRZom)+_gXy~JaO8Q5@Qzp8&FoUX>cn`*c5EN!@j zC0uo=rq3OMdABSd<95dn=O}LG@WU7J2#kk4OS$8cY{3wJ zHnvFscQAfl7Knv>#%BLYK{PN-$+(>h*}}xW^hl_(=eht3E8*!4jec{k^KTFWIx@>q z)aNFTAL0^4r3#2Y`<*gQUaoDqUw8|r34i}(EmU+jK5oR-3C*n2;LtA$Vip>nZIu>Yj_ z)+VCO9TbV>n6^({5|c2(acptlBmbg?m_MFvx7of9CQ_YiT!zG`@L>BUypnw!-ZgI* zH0 zU;G6Yg*nI#j-8$9z-MjMFpO=o)4>TwQbBPBYZZBojnJdI1>kSq0$3N2>j@ayO_mDo ziMnG&)6`Yto%mnd{Eo9xq-OUvXem{~+adq9C|)o;9Z2*V-;f}dFEtZ(*w$wQzFX9j zoxW=i&eG+GG_q0zk6cm7)PDG$UwQwkGv_~DKPgFX;?-#rsZmki9BFj%x-Kj$AyLIn z5X9%^Tjm6VfTA)Kp1m+?%nSRaTL4BhCeZJKL)vr(BL<4bSlJXhRyvN+oV>qz@g5bE zpuk_>1eUCxN@RUtu`qRl0m~ox!;gW_b=vPir+z=rl(}L3bw)*JZNAc#zw(ss`E;GO z1aE!nYKqPMrCKQ&&yAA77T|dWE(xxN()^L{oOadMRv6jJPj3MVCr}sxF0-m+12gyw za@~2Sl9-PT4<758`0@oWFC%nqfayDrlk^LYTO|;XC9+MT{_#7|XHza9Qs~%tS0NZB zlf0al?w%?$F1s0j|HRsN{?esZ$(bx^{eAY7un_+5O?Z1YA|`ujYPY+16GVkE%kJr* zs&k`UHkhiz{o7)oqZks7`dm-PYBtMSy!nwR2JE9Q_0WMGF*Yp}XXQpVU4Uu>a<$`P zC&S0D+33PguK9|;?lY$?Q~hrXfn-k?pNl#jrN0#tR?C-`Tk32lH+eb1D!Ryuj64@m zb!GQrfEV)DE#T#Jg%<^{;Z&{uoi%6HXmgp!q~(=r?Y)b+iJG7HA*n(J21Y*tROSE5 zY3+Y?Z0LzxI*AQ4+eDGz%9}!*w$2mncc6!lCy)M|!>FE#<%akC zEXG72`&ts6psQQu=J1^_%_Gvj(ai!?V9U}S#mcim{tcih4K^@P?>F)9=(;oXR?_4- z1}8|!0z1T7p8BcT*1{nobx2%go|ePdL+QrdauvuT$SjqOIs7Yp z*{e#&CV1I-{!?{XEbAk49>RwE1Y6&-^a9N_B+89X(YWBP`hLb)2Zj{9696y3+(m^a z6fQ3;ETQeE##Ra5b}<|sBo^%f&VVDiRp+_{-JjaCGvZY86AS!MpgQ9%47 z(62U!ZWXpbvYspPZx#Ys$_m71rk5|2JW84q_dFUoa|KsSoqy&rVqZGb%Dxni4>lni z6+20WFlPNY+8Y~OMvQuE&}AgXlUs5JDl@ZwmUkLZNs>XjwyZooU*HOr4l`{x;b z9&H{aX7*;=JtXjtrOH>baF%KwhJIQt`Bp~kO{kK`9uA7cKQ9*hGYD}*wCb>1OPyFI z#~otF6l`Ywf|O=kba$l5{8MONZt?=XRSydP#^;`wC7t->uc(&vu=FBy4fQb%R=L=# z{j1qYo?8E&JB?$ih?F!Rj#4_)X|68=E_X6~#dRnrn3-tNDbqnY$Ub;$~I49)LtHlxi>@uQE-U~Fk9Nm;LQpb?%edvvFe0haB z#Kwza1ws%XF((yNGbXm+sua!^A?%+pKVy7&4XW$U zw;WEFZhi>3F1(8GH8o~Ezo$8~zm}@~8Mt$5=AF5nl z*w4wbEkMN2smQIYx)Owa|Cn=Zb`37|yT_(ql8tWM%PP=Ok3_cm2=}~Ha^_Zr#9~_v=Bs%IUv9z*! zsqPDRtPoF0MHo%2TEAL{KBTh?InAp3V@Hpzv+URc9mO)81>wYH%49Fb7$dQ_fTNam zGN<-ec-WyF9mWj>{QB45Owuqy5JuQU-#=kYH4F=~{;dJDEUW*Nr61xyjk7yh6gXOm ztuOnJBrdWM&Bom)nAb-(z~zcJAoR{lbA_em2UNZe6zS>?j>wP1XDSE_t+9X+?|tih zjmIbkXK{1ITxS+{Itq}}6GoV%BB_N?h3EADtW@_QkRD@-tPHK=pFYogapYq!DU@(@ zHguNs=%F!hRe1tAx>Oex|G7Ce!RumJz*|)!b!hP5ekX9MRep1~JAnZ@-;Z);l}o!A zR&`+4)UVY5j<7^A+v5&4T4)v=h1|7<7oj&N7Bt}3K-&~EV!Dw0G;~ zB@vivI3n(^!!W2te^`)*0e#*AIxT^oKDrvfl zb^FN2Bt}G(Nkr6yUv88&?}w`5<6yc*YKfqQ+&-#n5V|}-wpcZ{aADi+$$8Ap54Ss# z(2eC-rEi)qe+pHMG`*_Zu;xR}_q0Ib+exq^HzmilqnyhcbFbqql`VTzp2ZiqfqvD* z)_pZLG3FxiX=2=TAy+wVP_o&pWX4V{Nrbr@vVqB=g}r)|xLA%vATO2G&vH25sV~#S zKT>=v)N15SHg$i3{K-q^E@2iW%WG{5M{90Y&9IGZdgz(zcXfnmc28)}uZe!uRf?s@2?T>Pj=n?nsKUPGhqC36UQT=;v`gOpRog(A*fH&#|9Gv{P0pe~ z`r4}u`7K}a_#ZRj*9F1K(waAg7&-(nDvK6rpDA`cvnGth1o2MEz9ub?8=)n!co1Sw zOOpOT;1hRjK$VI%SM*}`W0{Oe{MMVWzUbhv!rb?f?9OUD;s zc6@Qe*`(++qylyTL7AsS0srf2+Rb@)(jJ+SC+^m_9d^E%pZ=sXZb~g6Vyk{QN&mSc zQa?Dr4%COs&@Sz2wM;ANA*`tSEGlw6J)K9=jr*qe)%9x|U8yS>T50-fZ#E(K@QVOI z%Xx`SP^5)VNF9iHVZFyoW=&wWKgND;u&ipttwP*KCzV+*b&5Q0fFIh$7a)&p*|2XY zf(Aj)?|F@r9@0Qv460CW^AW9hMNZ9a=e>KSd<_bp8>+?26-Fwhblhv0Zgy64UX{=8 zgD6f~J>csRLF~`1)3wvyX^82tB5VY^Op?AYQ;FBCTnWnJEdO zuC)nK^;d=Ah*_Eaj+!4;ajxQ9a!6jSc3#Qm?*6^7ON!yp$LWv{SVx1Td=;8(VLdHl|7{8H&4ZkaFCf??AYk*=uT&MpO2496uud z@#_@3*IfkEsvOQ~(Ow;B%~t8s1FZfMtmbH^<;>^!w;Z=yQ&+!7?8ZDdT+CS_0)tqRBhrbD*h=Z!v-@LM&6PmNM#3C*(Fm;j} z&HTBhh1rbo`9r?sU_t;iu=2RraX!ua5=g0JxwGu#=bg(ZhwN*kSRI+0EL9q(SG3TE z(=>km`go+v`1O}U-A#!l2?MlS!QMPp49QuUuT}2+X<=c}Ox?3|wU10nG~vBy`A+yf zRag>zs=f?WN;QiqJUaq}SG#-)JZeRLpE?IJsD9r+&0WVRH%eCAfYu6y~3$_90MyokK)f>CoJ`{H)e5Ci`9Nlx#x>G$T&ELKPYI%>4j z8d5hv*W59o_n&)J=8A1Q9U~GEP?j0VXtn%LiI>l&G_{_QaPD--c!59O0vN|uy^wr+x=m2ZfZbZ^? zT=zbW1vGH3S8{dhuA73PnuGh(B)Wz)Nb4g-FdMXepwpURCP1}V!(TQfHpsTP&`jT7 zk|3n_KJQ=;AFc7T$YQ-C89i!mw=X&Y-0=&boPVM(nheX1!BP!ac@$ z%_b*mdPN7}#KDJZ3lIK47%|K!ukkQhe>rpmBAeQBZTTkYWWch1DktRFD#hKqE&rob zS-a|I&8|*Lqfd`2ciErz&Jxz*TI;wWVs)qdUIge4!v&ZvNGa zlD{#WWE9Kywf|(|zi6C$@1nchCh%LWDHFdvvq(uyxe0st#WqO_w&Klseg=wdCCkHT z-F#f{xiZeI>}9Dv+{+D}qoCY(7jx&GF}9B?ci(PoiiRc@#Oq5AZ;prs_nLI{D)V9) zKyg9$7M+4f=Ikb0^)@I#TFB#uFeQ-kXvvBt%!$_`n@5_Xf^dfLX+AG^Jlpmwfbxg_ z^;%Gb5+R~Zx92J>wODpLc#^!*joys~JJaqj5}2Pyw4Ed;@#%gZ!wR$%cXd?}>a!_O zylEw`6^b3Z?)JmkL;t(Eth`SqpP&MB#447*`|^Bch@W~YP8w_a%XFOrV}aaP4G=4* z&EiDc;o}vKo(;K_4f8Jn)561ueTj?=$zGY{fvJLc(Q4~PD5}s}E4A@nocDBy!bk)h zyH4Ut0(L6BWaBH|$upP73<)Rph)vUciw0bI!ifeXC^2pj#%#(yKW^k&jX;!#@`_{C z%3$JvUCoK4x9 z)7yy}`c4+*o+SUy&fNTsM!G-%P*$mvsuoub8JX^$j}oy!K7MW`UY@s~>R0w#LE_Wo z%OMYT=R$A9f1~a_!ndyy5+>hfQ1we4fz{{u8aP@U#1l)-W~zR$=d2L~vL9DB|6_!rt;%lor!Mb$pkvXQW=P?tTeQ%vg6vK6#P#c~X*O7{)@p z#1$uu2?|EGS8~d|O;E+hT|9TpqAr75 zOI3oMMampnOsYnamx@~#7aDev9IA&ROCKK9Xs?-(CzJgN`zd@+yPaLIi&lK0n@H9; z(ZYk4PI1KuvF%v`b)BW&SJbOtH|4_Gb#*8#x~8ipJp-0@tQebZY@v~G4aWk43JSXi zNNA~^u+Jx!;Jdp9^U;Hn^o*Z{1;$*&78|1F%2#v3l-hbpIFtn*DW*b^-S1tG6x8&a zzvBX8O#>>?6^he~KkUZM)}PRKjG|Tf62Gb#sGMay>>}Y!t-kzzB$O%G)=7}V+`@2N z&lgPfoXf$_| z9sR0>;1k^Q%#y8lm6w$a0R=*ok`DRrE;==}cu=39UOsm#YY7j=(Q0S=owHDeuj|e@#?}bkVv73Br`j|7NI?cuxNDz>7iw!^ znml@(tySoJXxWw)C^;d39lNLS{S}|&o0ct08ef@ov!|WZ%?fJLLZ&`g(~C*5X4^7YFu@t)eEC*s zqReZT#e9rFTh?7B5$>bt&KTSc z#R`tv)w0WkeDmj0@8k?N+66i32juXn@H49;eAaB)JZFLwvvK4<3h_pd$7;(>_vC1W zwCa)$_V#SvhqI0~H9=Gi_%82)uQ`w4v)WJm@)}jMv!#BgF|(Jbrroc=FgE7a>-{80 zKVM`ezxK0e5$YuLaF0k1OT?(93P~f!L@{w}XiZK3di#b)B~-3M-M7&sf)4;y&S1j) z6(qiDSu7Z*l?S;2d*OU)WEvICppacBtFoc|5L6sCm#f6Finw=YVCOm*meWFy1ENy3 zB6z@v=+S<}kR!`EBwn(~%&c7Q=j4HXyFPoxYC1J!CgR?-Z=FQ@LWAc>vW2`w`WBrcFX zc`44neE}QdfV#h3Ug4NL-OaV~LB%a~;y^t*%{*_wHmLcPZlL=-FqeG#j{x=m>2qFw z$z)oTJ+=DTgT=eY)_x>=jxw*rMU-&aRE#$j`t!=1{J@5a?SDb#^Qq|D$R_0mJe#k8 zC;9SRRLQW+R8;yRn^@tZ3WSgDcT0Q2=6rtiZ_PB5iXJAIkn5bQI{M;)Ia2msaFD2L_cWmXDlf=;5*v5>% zDq7MCnRWYa5Xcomli?a;B!qI9lL6@HI$%wY!x_n>FAVSI%K+_H+b?EaG8Lx zh-(qsvP2NK2G_I{c6m@WpKrz4=oQ{lmFP4vG6EI%C{XTeeG1(C3!0P4PpQy<+DWa+ z;lE9-@%8?XY$ZNRj&FMpixqo4Ncv@5`YwAXVFWz1IQP-u33q1l;B@Zb<-uM5J|r?$ zBDp@ZD5mfJ5m`ev|LeA%&HyDXx^rb;@)_rZFY+SwRG<}`(4JpEDLw=^`J3cU@YBkL zJ@QO-Z@0e~03+N&7piAB-)`Lx$nm^7Yu}HGg`^5_Ddrss4t?>TkDS?>NbJE0mD21c zJ_c~DA^;kAYBXtWuxCm}TC+2VuH17&1S32kLy%8I!2~?`hjNiwOP0f+vxl%JC!a^v ze16nVR)v}k6~z&`AJXXC&oth0+HmB!mIY`~#1f@kx9#fgo+nwEcKQ}QN~%D->!AOo zpR&T;73?9Mt~AFWRloU$LWqbb=Hfbyk_}MADWnNiaK9(KhBN{XTMxw>*4uh)(em5P zJkzv@mZ{t2Rr7IrF?(~-K8gjv+_<}OrP4+kzNEiD|5^(V9?k33 z@~E~fZ)jUUV}R^9LY^Giep!rSBqy*xlS?2RD_SR-|WCw?FbflMK z92=D$B{xrU0*))C!AZuc2G<&dJ3v%7b;#$KI$?sCwW#HV@R;Qv(hCOcyyg8d44XAl z+rl?`IjpCd@`azdJbC5wB^W95`vI5c2@1OwSJ5xlEU}znMcrA@>n{_@hm~$loYCN* z#r@$hCM~gX-R4QPn)?muYx51_URUw8thspXU|7cu8TZ;05tY=^)|8z^otKX~YC680 zx#FW2kRzqbeRlnUoeAT;yGfoxgUAFl%6uFXs`I@HZ-t3t7qM3#k-F2&h2uJQ&Jf$B z-YCO-=#U)K5C2#(z*VvsH zh4JvshO3NauZ&N*gcty)7*s_jCfPw!a?}>hOJ5>q@e) zOPKtI!7tbzV+kvFkYaI8L#AAJoe@!CoN$Z3d^NuOl`}2DZdFGmz*WF#AUGxC7N23< zcu3>-D;3>wSp|S#Nr<{@UTQ><8)mm}HEEx|@T~TJS%SN>$XHljqK%4rwo&4A=D1Qx zlE&sE4x~|J7P?T~ij`I#FVnsItv2e5v&NzVLg)=2+I61cC95r^x3@6eB+d1(g`{h zeU4+i*$UU)-f|6}y#yh(>h?QFMad8HU?@|{%PEZ(sbQ1louj*_{8b)0j!kM zJlFw&5EPbL*(_mBYlHg!+ZJjXqulB_u^qN!)RB||kKR?L&J2i)-(&yYEOMc{X~{o^ z49oa6e`i=y#Kx$82)R$cMfG~ZNajPYq@BF7H{bA{zpoOqiYE!wBm6DMUyIR5q7tCb zwXyrL1d*kTPi{bh(_!Vrob;r`TASxS@7eqByf<+{zT45D&7%KVBmTce2r4GuRmfa> zr?Vm8y5|3Rgy>KVNvS;eosTt={D$f5{yfuQVdc{ z5&8`aiEdVdOIh$}Q*U}W`Bh|3kmbRQmpmKKJEAgyhXI1GGMSZX%>TE7HeOa#YN< zR*Mtn-X-feT@3T{uaxQG`yTz|)6K(|A8*|eub|^nH{nfWB&*A5d5)jE@D}^of!&5B zsA2~r^2e4u6e1!C%Kq*F&K{geY62qV$iB3sDWvP_oal|`^_CJltu@`9+X;Ybi`KPS z*T**7F->ofPkd^Fn=jNde72a?s4@6SpdBT@(K|A+hYvv9pZ~?ig_M1f$PYJ2SQ?*? zbMne4H(h@r;p^zm=kcEYZmO_(!aj|46B>&^OAuB6f=K>?8UQA5gWhoLWyy7Z8I zL@L(?rDyr#Z?r*2OfkPkv`QtaAI^*FYNyLg0@(f5=*1K5SU_dMRusH_^3REfUOahD zp-3}^z)JsfQk6+NN3;&xsV?Ziys!KT9Y!oHex-ZN2aX**2FmNsw_-a&?)SX_VkR%Rk6^i-(`FBlkZ4Mh+3n`B=zu%X-t}Y z;67e(3W6P$Kn!IRAr@U*|AJW52$Ihhjs%<_cn)^}(4I^mwt!3@!3V&{F&vi*O+>04 z?rZotNDRR^od(UcmMAwh7??aLucxXf7{! zPX+o2Z`E<_tjwa=$SVh_hPJJ5{M0P(^un~ItFw7xh{fSq+9YjzPpF4tXUhinG?Tr| z=n*}-n0Pv{kOm5@h`);+Jm#d6omAR_QAA(=O9!;( z^S_{P-qQDkq$e*abW>-)s7c$!O#KD160iscLwH8z*_qYF@n6tnSpxkpzdDlgy_8Dj zgqnogo`j2|uAK|Js3xJAm&w%)f&Bq>ME~4=k2T7KA;LP27cf5%%!3+LQH*_ZN+pS6`aus*$R^6Cr>fQ6M2$5)+u0!A6sIrz{7=gu)TV=? zUy=%POqar^HGgK0g#_wJ{N#$zt~8yy;NL?m6M#FHl5@i|+BHz=`PS;Jp$lZg^vn0< zQy(@U8-l?2&vNvF(`-ldL1>$xN)za?nsM1cAJx<0klUe3Y=T4)EGTo2jNWB&Ah`q5t`JEhgq$l0PPjwWRhoeU&EbnR%Zkye^o_|55 z8JWez?plY-BzV@*W(0v8N)$+%nQ;gI-Dd`_c5d7fpT-ls6@vG`%3HF&9Pj{~!8oxf)$e(1-9%&9_(wb6HPJSB{3%z+Cd4L~&YZz2_~miNC)fb(j`TX`uEhd{=Z)Sk)TA2>9bdKz*YnJ)uqitF*mPlaHEfu0)c5@ysH~n%5@$r~K;! zXK<+=(W+ygIMyYT?>5JBycQrRxC_BhmW}K^!Iqe+zsqm8Qs=eV;K(lZfcxOyNUx*H&kwYpc%t0;P0$hW)t`rHz z`%PvO__3D&Kn16F&EL9L-uz+kVfek@1AsBrX#dQ?{;dY8gV3LtFZYmnJ4qh9r*J4)bXEchK#F>x{v9c3yZB!U)QLq?c(R{X_^L@*g~sc!lO>|8A))bzZSwhwp-+|s2FbuB~aMO zos3Oxr_E6yupwnX>RGAF7rwDEdr!;F2a$Ka|BF;Pi=_(d9_yVc)-{>ReI?^rmK9zN zBWGbbse&ah%#lBDPBfD=oD`qa6xuR+{PkrTE4k80iX-&{tC95EWEf~v7TFrswl&U0 z9=zBz|2NlgI=w0$d-Vj35c^gvu})M(TJ@K{}gZw5^1f%aTjMh zD)QqL%NNy;TB%VT9DhMKw$kX0@o|Oq_4-BKoS2)*4Ap?SSCmU3W?M@c$9Pu80 zy>rr%72hhF%N3uHy;%e6{*~xBDzfsux3~xQFqI=C;KD82_li@V^a!btOJ#w>H}GBH z`IEuypNlSBhhiz(7YHSl8BmA8_h4{`ma`eoD*BjK?^F}-j^T(CGWA`q&3qZ`^4KgT z{K29P@0=BHFsnXi;u~%$?+YN5$MgQ=zV?}5mA8qmT|?z>_#Nk8CWP0zM~-ZxBgbjS zk;5%tUG0B_xmex^`zU*cdUpY@i%U{de1%=UxS_zHjqN9j3^#+9rftUc4aQy#A8T>6 zC-z8jm>|#AE)?@=tTkwB(aP@x-q*+UlIsM?Y)LVaoW)8xyVDQ{18-oxBRaImy8Y%W zeGsI^c*}Q_wH$iFuLT~+%q`R{&TG=U@II62e=o;8g=?v-u)=qDe7%|6^;K}Jww1=I z?wu(essztsqNOxKwOh7!tD>B?F4Lj2A_FcX9q}e!s$S#8-GmYL+hpCxx3$)HA1;0p z*)!eSSH^}HoPXIXUGw!Zk`{7rbpWw(53B}(NMFK9VvQ>0${YrEm|ncDdH^!ufPgps zEc1(-WdciDs?Hk=y2OSDI>}l5t;s%^P2VbaN z+k_mg^|x)VjXz`Jzt?)d!N3kAz7+AWon~=`W-xYA);6o%!p_yk9J?Q0rt`$i5cgV1 zOLm;FaxkyE{%hbegEEEE^F3z}*jWV>=)0}Rgi8oJLeOjzdGSerEoFdDhlImsDJ;PN z7x<-aqR-x2-YA)Go?riZs^+U3-M0)ck6wRx-ac$>MNq_N6>Lg4wQ!HN2yR%?4SP=+ zYO(wp-FlZcnPwWTe08q2u@^of(URq-)vn&ZidGX^Gn#v$L$QwfHD?VD2QjV!;-lMl(!iv7U&k(!Vkq{p9P zBw!^FN?1tS8XW|2o zfQ#chlz+7hM8RQiV9<8UJ45oyOMnGF@#aW|&ZHzqDW$NKudCM)DzaD7DfY>StV~VS zndjXu(o$mbOs@hi0AYXzUUw0C9VXUwu=(yzg@$*w#gwI$_#Y07jzc*mZ;sSR3$aYb z!TN@d?X^;wsUsbVKMU2`LAKbs+GP>LpO5qL&U%`X2`Z!VSA7;Q6#&;hD*O0>y`@!g zS;@Nxl*6oQ*J)NE?$L0ex?y>3OahrPKD2`2qd_vbiKg9qRJO?eX?xB)@uJyGW$=06a77&ET zny;_T8LP>&P`~93s2hpH6$Ov|33ZCkB;go z7U=^si$89?&7k)C)k&0Op_>*ejH8Zo47ZDgG%UD*Oq1qpap7#a6tTE>`WF;G46L~L zk1HngbWQzDv-*jLSzGmpvy+w;-&TUedMIK&wH&C=Y8oWUsr}2cWuglAMByQ{S+NvQ z`DHopsbv6{r3jND`Z_J(<(dHvHxkjVh?gP4w*J?0?Ixq{t`kDyqWxbE68}FBvhcqT z^1qk=^Ff%F0I3!$_FcVHoHTm>;*pt#Lyd(K!>nu6G}{;zS5R-X#25S3^0%+i0a$gb z-UVWg^6#v3yXE>Lg-6TVKXfe6`72@!NsLD~7mIh)>ZRU~rdGbSeSXg{r24iRSMAQK zeZ(~hPkS;RQjnx(%FVF z>L&HLy22mFuaD0SrQ-_CW;dlPa0wEQmzboohYH=2b7N`8Q?eNk%Qjj^y=6WgGJQYz za3UIrBal<9>pDT4Ms`g#{Sy?bnRx2iX1jxAXt-NV&DgA-)yCi1UExssBaCP^v>C$< z3JT`aX*AtyspCUJU$v%=;W!)8#V#`u;$Xrxq`!Ar-}DXQtd=8>mO_(nB;zTU(ag<6 z{HWepZHiS@gXY`QJ&(}6()}&}06!@q2e}y~=a^|h0F?xZ)A4spiUFr%?ozFbaPHQYWqQd}4)K|3-3 z1v%C?_>v^4PIEd@OLs25dAPC%P0N3%p+JrnME(USwWd$zd9J})p_vGYEIdoC!@5*V ziNfxfkkpUL6h4Z6)lY;!Tha--iM?DUe|{t_JmwrV*YJ$b4x+HWuJ&ykpRA!6MH+DJ zJd8l^EDe?UeA7g=C;VhXjxH7>H+_fxbloO;j!O{c4a@sIT;WPTun@ z_komxWCPFCe(Smc?j^t0Y6Q+It;l+&+vj#o6zQ~C);!7ZOvS<**{nbXha>P~$r#XU z85fSVzaXR5RNjl-J;dYSPP;jRAlBe=cG5_ZerQkB{#}wu{%E#+VsEboKD)UQo!qDZQ(= z?9B_?)N-zLa{3TBQXSK4QNjQs0}!f6E2L=a;vnUm^8`@Vx0he1CA(l0uGz4jPL6s5 zU-6Zkao$aS^6m!6A^jm)S)OpPcncYElv@Db4&+=<$K}DcOHvJ^bR{?o^KJUMGOL?s zH}=))!d>JOOIg=n%Di@C9gK&2V0TU=@ZFYGSgp2=bKvy=S40mqIC~(RuS{|WtcX@$ z#=|O(?>zIlUi@C&Gsrid^+}S<;<=;c4g7J=JU*ra-7z6;$u)LNF(I#Py9}Z0wqd+6 z9a4rKD3oOQ*=9YEaj)R^)bi>#aETWo6jQ^$7U^;Fr^Uk$24 zo}M8(=lOBWRTwb68g^L?#{{4>?o2&t+=4ifx(xB&mro=2_(yoGYvyA<-H_LC<>-o? z_tJW^hpQ}dhSaLHi(PS4WI_$^jha@qj9Fs|(&v12i<>n;L1u%T)m_8CgNFh>lD&B| z^U5_2Nj9_b-*nIYzao$Si+$d?uc-pk9TVYd(#I=uSas}mv$c! zu_xP`Uq3F;JgadqjE`k|`UQfgFTbY@QoDJ|)bIG)3wX*@{M`?F7*iwgc5;zq(@{SM z*L5zZEnF)%G>6kyE>VVLWrm@G^GeX1EP@}BE98##BQk)DGx1XyGd%Y=ocWz8eoFfM zTGd0r&9CqE%EX;I=WFLd;#r<1R7tpMHz0`E?JM`L)kVsaUv{iJ%nM57Tp7w8%zA%E?pYu_z%S<(&A>mqTe}`)tJP|k4 zuh^tQ@9#V;Zkc@R>2Vd6+PY9LhrkUFu1EJ{iC-o@m|w>01iRTZ@xa>)i>@+ zCslLmiE+4>I8egE71AOL35?htcXhut?(NHchiPOS{=H${_m`VOI0eSpV3=Q3Rp&s`xW8L@$F3{dp>GP92EqUj$z3y-A`C{qG>P+H69JFeyy`>*s)6%9n z4AP2S%*X7GogS6wcTJx9S^OE0FZlH7+m-9kcPkQps12y;)Q?89+cfW)A6_f^VUqAj zVN=i_^*t4Jt2uKlKfjLIm*NT?@AKVD&~d4BYOnn@6$oW6=ka;+?3ipo?o4=+HTRl+ zqbe6gPQojq;19Da*G7a&@6hEWdvla@i7;jq%{`)45!jOW=ewiBf883c;s0wzQ2nd; zHiI(k*jDG5S*ONBF$4cxN8M7^yGb#+0WQ0z>0^czam^b3OA1Wha$3yWmeE3ysaxoF z5zh}2SAA__Np0_ZSi2v7w-WU>Os%{UKXU=@nrFnGP62WA?n3QfP~KS+JVD^zUyzUE zT+@TC`}-DMg1Rd3UR>5m20j8iy3C&zj!XzIu5@6Sd3gA?Xwd6TcGxC+K+1la3oA!J zMTlWayQ}e7L+DAsWl!2llzkZ-L~ifm?hL&0iS`(4?Xtw!$-!?;_zZr?2&dunDM znKFJ((xn$CkNaf%h+^!8QTID2iX#444op2~OyH{-`wOZ&x)9M*KZ;Yf7;CK&sVFyH zBX9J#Ab)ZtsJL1>F;u;fgG}~h0*XvXIL@MSkX=h#6r;9?GqMOE>#4^{7rs6@44;zk zs2|QvQTO_6VtUWWDY3}|O>*STg!{OFI*t5>qpxxgdpQ-A4{>4f`^Bwi8KZRZ7gW_8 zddM}?ynj1j^tQ{jlQ_sp4c@*4KhtT+#Vv^)%*g&`%X~h|!XjgP%eUgmyA4fLQjw4A ztS9G7CY~UL)aZjt@W}_PQDn{-dnf$)h>3|n&dDeYY8;L+^LZ1)cl}36?}J&j^L#kn za|)>enEOc+78NNaMh}E9l>6~H$dO|b8+UJTu=g}^Jd($kwZu4?%7ib8C*jbJXA0f- z0sJVuFRFAqpqj= z=o`3bBc~bT9%XqGr`1xl0B)ABt%#G;bye7hKQ5L2Y~wYYra>I!(=qQ^yQBZx6~~j@ zQRUs7!u%dK2qb-s$e_iQf< z-?#yyb1`*%#7&%K#*+5+O*Iu9(_X4KYg{vCt!*hW#f=B`r9=*?f)VVG7VqOpp{+du zz4J2gzvvh(b8}mv8UDrb^q1_1Z^;RPLY=~`juw{N?Py5k!jgSt?r^i3)Q^f#J1JVJ zTFo>5;61WWfg>WAh~G-e3WJ|aj9JsJnUl?KRLX1LfNM8ns~2Wq4DEG|{U}5eV5G^| z$C6^F!{nGB-eqeR)a|Sc`ONxDNG{e6keYyFtGAeY5sPA^Gsegg!Ssc$$r=q#Uyje$ zHz|S4@S4eEc{eTxvQdZs;{EyGeeWJ{K+*P(#zc}ZlaFy%J8P1w4N&hXK#7T;gS)vp zH380^Ee|n-!iZhqfPj^|lHwnu10Ktdh|LVdhN}eycglV{$x~gex&ugZerf0F}LBm;!YCZ>o1H)V@J+hAS5T8 zYDvvug5Sz6&W>x|76{Hr-ENjYugep$Z}lI}aOZ&v_20MF%dwKNTkau4SD)t#S?QPC zv-Tm+0@sQ5Gf?bsy4odmAP?YJbsd4*Qoc~x*r8P9z)^8yAOGXQvA!SXW16ziK1X}} zzR^PC<-wZs5PbS+mF}$`iZ#%^S&0yd0q2tC5tK;)o6y`B28^)z>*YQ!`K+_ADUKIP zj;3!Mv?B=0C-61;mB%SuPk~NZyE8NK<5Msr^STLnnHw@h)C^5eOybaI!G#JDg;`DjEL@xtf}wqubOG-XL!*_KqfD`X9^Dsf0fTl#*mpN z3UTs{1n%8Y{CpOC(mj+O5Z4sE_OtG}Hup!L{umoV9|qABiqbe~y1ooA$|(&alX-y`2gaj2;-x86#>fHF2Um8Um}CxB-l(x`JQ2&(wP8rOGJD zWp6m{?D*0VBQ&4o5J4nl5$_h8&(22${&APoGfzyCuS}sReS7XLIhQmgy&=1Al-}*6 z=``|t!NWEFU?JiP*e}=C?N+@^qI#P6f#ooao@}xYP`$pLL|4u@`bh8Wxif2I?f)J+ z2x!DAl+Y@H8&NDCy|*r^V%`h{hCU=XvAY`zWXvwh`!0834Z>6SGozi#COcn-?a^H0 zz4!aMnKL~yv9$L{#km8{kDO5F0(#+tM?83y$~ zToXknYb+O27sC7Xz^FI>3CK1l1*=*c5qFQP33n@5?v~mY6n5H87YI>UC1oa|oHK#^ zTap_Ej8X4GA|1el^9~^#v|LEFLw7-38WRbSGcj{;>cX559V&MMTa(YnGz`ve<}nxkEfDH8>t@@uTM??QRx*+-0MCGog{y|xcwZuC5brW z26R3O?$854)3>8ULA5MGT4nMc;ZE76B{^1J=G?Hj*x59&lxc=PNxszCP1NV(OV(l` zHUl9C zO(U-$tC#CP-NT85Fh$t~XQJfr5%mJCN?}F*I_2K0vF6M02Y^#9|03XD+uYjczL(yx zm?DHr_&LpbwvRaaTq=u~mE3&lVVVnXmF>jb*U6VITd>(;_ZIXs#U>1e{1>euz?nov z_-&j~9Ne8bJJ%86f@JpSczy9im+X_IqS(CpHdH@PtBcy zREw(`ka9m-rh_RWtcb4*g7BW`5AZ|J1s4TCz+Q{oWlnhFTW!z z!R7?&32zI;v+7oM;fcwKWDAp+C=ok`t{ippiB|%wMN=|dQ)~l5gSGA<2uh$&jnGnR zks=-5=Q(O(b%MX4c0&G+tE-219W=>g33eJXb@<{bSBii4ko5G|Gk>PsWt1885?dX)|5}v4ox}Oiw(^n1ZRs;Ll=hLtV z%w(LrGGfX9l5SKDdo+=(X0yZ*;~^inY9acM2>4SXZK8Mr()RP`CpNUHm*l`}P8)!o z-UD4^-FIA}#2eaG|8zcd2}n_#KCvL$C$8HdQd-xiGn*Q^oL1{nZ0&u0ecbJ@u=s?T zuE8({NuDK4n%~*189@?5Gys!DUWGOKrQf&s;~LNg&PY)_WTtH%?99;$-Y^>OnOLK? zcq(Tiwrij2>$+_3D<8xg^&KKykz1FxbQ*qy2ydaoevNyDBQqcOf-qYw%x|R2_AN~# zdt!d{W9JmUOrTxpA~&o*_OYuMdRGbEoIA|~2u=@$!C)&4oSAwVK^k}K=prT*NS`5M z@?myky!GhcBD9TlZ;3IhpQcDVQZ2ft2#y$9BU2npT&2iSQvwq~+GwIy$em-O?&?y2 zp@mdg4(bDih_E4+DqAK|NI6Y?rq6-T?jk7~bSBZZz_RBBX6~%qwxpaqVv__kGp1%! zo0Esey^56{{3B{t3;`s&;S+hPVZ7AgHP7XnAoat2FhLdD9pVk9*f?3J9gizxpGC*# zV+2Q~5VU_>9sNb!ULWM8kEj`=eP3VW_mNX7;w5eQhsff@*I)w3I*7Mbhv2#x?Wp8G zORVK(@W)GBn!2QpDl6N3X$%|EZ#zHIcyOlvVUP{G6W8Ghp{mssa;~J^m~6R%YYmlh zhO%tFF+qN19y`(oTPmN#B4cixz0USJ zj`@_(>+NR7*D0uElIyT=1N6@_*Qehs_G{hU-A%$iZ4hA10f0geLBy8&%~a#`MsBA%CXu^ zcgroQ4B8QGmh-mf3aiWI_~R}0qg_Iq?{s1~D)oh{J#r{GP=JPz*byq4baDI;JU`ey%Zcx%23AP984aI*!OPNUbb1)vj zkV%&T{L6c5%&-NWgMPLI;}r7o%01=%4PDCg0ycsD+a_L!cDapi2o8#4fgGIzurx=d zQBD@BlrXptE_wxB9}8UZ)h0-^By6k{4aGo%ma2rxn5jS~(=z!u`{@HClKmmIXw{Ac&5CS#uc8{AZ)m5wee;w!XWu~Ma zC2<}{2fkG328vF%Td`T;!UVRZA9Bj0goP^ON8gp#6;IK!uTQ0_RGt(*Bs^am`+nqL zHT_uV#PF_GLAykn&M=UoDDcXe;f@J!b=OYD4oZC>MQ5A{<#U7cr+pviJB`J%8(cG@ zfz9OeILr9<`;R^9~tXnSbkFs`ARCy)ukJxJN2%5ZCAJ6Hh#?2r{91Sjmly4=WN`bZn@J3td zY)^|r`lAV!r}qSSy<8Mn<0;P<&-L9zV>jYf&wT3sf_}WvPrSRlhV~DE@s)RfZ|HRajffnQ9;=g9G1E^yMk)>a{7os}kPpxv{9WS3I|^n;T$gtY zcD>%ylRk^pm=ZsDxTpR0xM&eopUxlmXmt1EE1LxJX90fTcBpBSs}1c}cph4N-rDoe zv0$>P&=0j@GnTYg|I2G)D(&hB?oa^9nz&~gx+*Z?-n^dbUM!zprD7Uoc(2)HF#z1n z{O+W2FN-_(YI7Tg>BIN?y|rm??mnh)zIV=GK1sL@B|!`|XW{8(fE3O$OvNja zkU6ao;uS4Gg*^N(f9$;>sXHjTwBz^}boU}b5s)ZQEJDv&fU)v3sukY|U3!cM6T|QT z1;gV`6#YRS{cv?wAr87~<*~ki{YIbwf>Q$G|2a(~BqWr09otpqUb5N9DSaMRZyUbY z%Zq#!&cX3%nKyc8BR5^UX#B3@oJv4D1vm2icCpm|ADw&Z;coa&Tddlzs_oN8%luMd z)I^}!H};K>G_vbnR)^h@-{^UKc*PoE31@gjcrKRSeJ*gCzZt;iqw@@S3N-;g%i)ewI3^DmLpHg%~f9L&e z^~trD8jIIS+8h>8Mp2?hx{vQ=+xZ1u+M;hKi2kR;?ht|t%jT}tf%|W z_qV34^gwXi$^e64Ud%I8;r(qfASp+$qP?98sm;2^8L6HdzU+)WukYVY=DhK&uT^l> z(EIG|PnpJ(veV?c(L?KbgsAh3?%}DEbWpOp0 zJ33Dn?;A|x0Xl5~`5-|PSB($fntTzUdLkB~U1aHWZ{Anio@dU{@GEb;*EE%txW6&4 zOTh#QwGb%aF;?QF0tfB-L#;O^LWE3D&;817FQ0K()*G{FYBbR}NTi)lb0s6Ex_v`M zIO@lv9>e(u3$a;PohJA9#0UR~VC-Ph{?j{)&ELO|6U^QgEHtFZ2(#!)r{j#Dk?0w2 zIgC+VPrI0bS1th@qhypK8T@fV2ZIBPPnsgPs{k~@2d!HBwsLbF9+TU3;uoI(?xf;% zqJ>Q~Y48_G&&Jyya0~RJUgcI@GGFhE5=CLoYSb8*7-EUD9`^$6mQSkoEx)GcWZmMs zF+W4D*{95#_4=6yzOOz61)S!>U6K%V__r2gUV=@-wXU~n$;X$7-6280^Zk^%Q0pEc)^gjy5{v=}?$}4;E)}9Gy9=SUrTUCfQq4~b9tx+O z+V5ULu0$?RW%tY%G&3y8#K0j-yBtgJ6>s7~X7Ntilv~o=cN-Ecz8<;G=Xgp0tcG;|>;PX*}_er_m)DwGd zmqwqdG>6MB^+On4bNXiGUy$=30SNg#K4h>xJs+1oshRO)^ks-M(09RkTeUHoIePw1 zx_D8U$wzgNA@2haJ|fing*)q}^tpRy(;rV-pUxPMZ+JBW)bOEbk_e=KYyvIEmrD6n zeJ?K_21ml?`1oVXc7yYr*e&9NmmIth9E#b_Ia@@w;WeCCdr_LjD-ZW<3$@#IDyaG! zUzTsSbqKAL9D^WQnip0uX|1=wNgh<6N@<~mQ+Qyar-Pyn44C$iiaatt`>cg zx;sd`%;xgOGrOi%2>eA2HsV*@TZ~9p!vykz_J)PqJm?QqW7I40qF$xmdYh6YiP2hk z6L!B7NPiu%$vQZ@V%*(@w)F7)a(w9^x2Un#b{2zb+NrXHsq9Sk+%1QOc*)mus~>+E zaLIJF2BTjfs%4_Q{rf7JA6J;dIQMI;n4Kn4orMialxmaABN=VUR}QexeMmL_2W#IQ z)l}E+2?B!BktRq{s#NJnN2Q5?i1a2+N(e{~61sqN0RaUmQbjt1-g}i^B-Dgn0tp~Q z2zlpxv+lj~zRuh`Yu24VvJzO?Cui5Q_w)S9AZdfsDn~0<8EbzMRP8H#0X`Jq^$n8J z-CPv!K+G?GcrKGR`6QRh|IM2l2cS{JRZ}b5cc|iYUE8&O;8Bgg*AcoI)q`iKX`MY+ zaBcVIaIULQ=J{;r5G?X2icYP?c91;}J>R3_hxmBjuyVJpwJK1&AJpQ0XBl#0);^KP zALX^jVtH7gP)Arp>yY&(?uXD5ksy^h;a`f{+E_Jb2MqhZMHbk=D0#d`VrAK-*$dh) z%J;Nt4qTYLV)8mu;<4Lw!7~YoNReEQ+>Bt40hNF>aZfy( z?tCiNe{h}?X{NA6q!o*ThJYm?d&*tEC*98%+hP%jAcIZ#mSTU*67NOODRI&d14byj0l3F-&JM)`Tur5()EN7nQND3S7mDD|;t9=|;BZuhSlG@=q@5 zF#4)dxcSyT1Pl)K+rbUq95LAdHus^AR{+|vSB|k3^r`{u5LmkVLe=Ym;Fh`h50aU< zsaG9o4hh`0x$nW?OaY@N^#5gkFiw1+m|pEU)`nIjh?vWF$?a znLI9NF3GrQdgd>_E&jcq@H8r}1C+(rW!Cp2Vm=J&1(`C--_5Z@XMf+iK<7WKC5FGF zBW4ZN@OYQ*JN3!cI(oIfJgR%d8gOsNa`Z8QkWXL*L3cOky+ipR@eH9!@`N9E-k{N(%k6+A64n(*C1Hj#gUS`*WeL*`Kfi?kt>#m zW-tjR^rVHAj!xgu6B1~5ww1kZ2%BydMxPtax)rs~3NN`dyEQo($!l?{AZ5KXqd#?I z%ZR2Ct37JD?m!xW4fHA8v8Klfd%0f3cXWR$jM+NsMN!VA@%5qHrNT=ZTa&h(hJS1x zeG)J173;MZ$Hi{S_%Xtxq7gH=hwE%!y^|Uhdmk(zHN*Qx?YCktJFB#aS-8*I?HH!{ zR@v%j<&&0g+?*jiOFvk+RI`8vMVY68udixG_RGu1nrCWN7DQfgC`XG745$TAHhkcpcji*4bRaSFL zpW79qX~k7Av3&F4C}b=WO`I zGea`L6z3mXV`>;~+|6PgUAHe-uDjajZii$IAg8=r2NoT;Xl$JZ&#mj%QOrlK8_pR` zD1h-*14IOV#Ime`b|@v9OvDy|A6CLxha%>3XDhj?>(T0=Je6m?D3F7Mxf9^;lv+3P zB)3zC&&P?}a3r*l;&;xxd!kIk{_w9VX?bht-!>r$m zdi@@3yd&Zc(G}cCy#4qalnrye#SU`|SsW#~E15&J2*NmBT%EtKBbkfKaiLr2vGYS( z3mlZx-=vSEn&uY(1fDQIibJJ1=$D~S+R@&X<*QJY+F3h{1jNd@pnCeW3k(*|M9|Vk zCU>hO31t>Kzf`Z>3h87o1$2_5u<@8%TBb(|gw761UlEJSG|rxT8RcnNuKV}+GPr4$ zt(Jt;VimlYnZh@l1$obBtmBm#wlTp`P-V~>HNmQO`n1b4q*qVfR7P$_~=T zr}aUKbB6gli%&7-l(*JJd$K?;utby%1LaJem4#4+8DaNd!h&W~ESkJ_c@cG)sHAZg z3I7-MQ{Oi_f65vY6Yc6TLf_5?-43?tTBj!1yB6@1r?GVgD*xToa`)|9jV;M??)ew+ zF(cU|d&f&zi@|S&OCb>Vh}DLHkaljIS}i)Dygte2yO!8gWlrx~wO$$h(!5@F*~3xv z(o=iO`f;Yu=Z(KP0H1=1W>0KK9|slAzYs)?N$Gk^l`7nu%?-nkWzTCQj~D|c4elF_ z@%d5X0g#IIMYpg=G~d-VK4~Naq@5s6Y`}U4k<$uqRl{Umm;#yFJHV|#O6%}W{L|NO zGQ5+7BpApx%TG4p??i8de}qt1X`spxTHd0!tRLs^$9sY3&)L~wIqEaQK#+_pnzB_a z)7iexWTCLBDTZDXuC8Pcj%%OX1K6;QGJZMf;i^6HuJ>Q>RS|kMj;d=B4lRlC>U8whXl1 zn5ZeW@uRx_hCs3Z|Bz|<-%#^^|4tKi%!*_aHgL@xr}^1N2f`oQ1;O`E1fvCl@%Iw3 zR@nerCs#Tg2*4Ns(LaU(aq0E0{avl93y$IT^gT)WAV+`+y$FEvwsB0q*}PZT6-+7g zxZ!$H`O)g88?B%C*$Y1QIgDe9t(vQ4^i3e1tnTe1cV~ExLw8%`rGFh+1!HdYMD@1c z2I*7I9I^Gvhz&S#y?&jb(j$1EY(Hr@W$?UGX11LLB%@5xYUk*aN2zBpcl5fsx>_xi z@ka(jyk+ECjF!3`te*V*q0~FHx4R=bZef~Y{kH6iq3adx;Sol=7w$dDIwVP=9cDa~ zjMRJ*=W$%Zxt+VtP)j=S!?*unLBj?CIbuyAoZ&gioaU@p?}GwZ-Xk)Q+`l5=4UxJf zjOB>UY2n=FG#Yqdr?a@Vkc!bte6zUT&}jw02tg=EN0z^3oI~QbQ5lA#Ocmfh*bjG# z`&`2V=j&VXV>BlN&vk4lC~ulQ;)s26oCVi^i`xgf76yK4y#+}mK+ynhI742{(cyGrfJ|B zYMpBDkV_5CYg}c*>C~VHq7K|pWrx2k?FW2gCQ7B<# zaQFpd@iz@LeES86yfDhXW|jGHJ&C>(t?g{%o%uQT$?pC4+%W`>rd)mm7uRDj%HhiF zt*RLDlinnkY}&fXv1n(u-ZbA@m%J;u&K3{jMP$0&abveUUx$Tx5FxXMlbcP`K0SEc`-pKE_D6X{}<(v>nxO;x}B)C=XQ>n7Fc zBDUvvM2Hi76hnaMKxkk|@ezumcpN~U{$HODoxX1cj$VCHPQ4?8<3Az*BE^M3USc&= z;~#%n{-F0#LL4A)-~PCgI+h3@A^fka9y;ZJKBsn|1|q{f!QF{y>Z_Y?t*@~3!QBb} z{+VHEQbXd9eT-u`fmbvvh$swFq<3T2Rx`_9{ZdxmR8t@*HzfEb>RVn4`0zzc!f=-w z{R@8k!Z5M8>p@)!gvu5yC0l#SCr4m_1-=LX;7c3x=`$K*g52`|B2YS8J-_O}#_0V^ zxFjdQ9Y^MlzgPLfHeK`+&7$N%`F4R*0NGN*$+gx>D-3l43NNvr5fo9p=wikl#9G_< zWJc5;79*e5*i2>T5N3H`XgA3HisVRPJ~hj%gk2Wn6_uLNzh$Z3xhP1<|5q zvvHS1_p{4^`1pkkhqxqx2P*gEA>c)GWl>`U^)2b*b+E1|6C}rCN-8tZCU**Ea^3}2 z{9tqD&vXNyv{gBJV6v9nsMA>K-dr`h=vDuwxfeO5|qALu4B=#i{T=9beSh%5FMLI)IF9N`YlVqr^cN^J~lM0agk5_p@2YK!o@$X-}hq9daZXU^aGL z_(l~%pe}xX8G7AU+FgkrM6K07z+UbiTvAsp0@sgYU=FiXbKZ}X;DD8 z@y-QrT^>#tL!XiG8L3QT4pnb^(q2Dj@G@;mK2|W@-k%PS6-u}l{a!}Aed4n_g2P{5 zg(Fjm9;4L27Y-F%=fmj5a%Ugw$oWkbOychfe>y3OHdTfd&aq5>B$ZA5*pIzK^Rr@l zT_cuUY7P8c0fhkgE#B-5x1OMl83v;TV#MUx3kvGvrm0zJm6n!(1`JXj0ZnV(C%szR;!V!r=oQ%#| zFkCdPK7?~Tmkt>9`=h>t54-TdoTx*o`!F&-?P#T?6zOF5{G8n9^qV(-?R3Vy`n6ii z)VqHaV@-*YBJVSaFEBb&6X6ktPI2GZ(()`myv=;1I;Nj=am0w!E0@@g|+sS7Jzfc@aesJgb)AW&L!5@E>|j5I>Ym#eRlG z`%T!SfpxP?dP9y=6?*o-w;^4SE$;YxX%kVT8n|hj;MHk`V^1*5s^A?Xa@O9nL}Q~?pAL}roTY~Q4V)ja9O{48{JfVs4xS#4^FXpmj7*`tb#AHD%~psWnK`CzHDw^l zY&6&J#G{K9bq@%~G>AZ#de|Pr$wRe-@B)qR&Q+l4)yxI^1Zn(efJYszHoZ7`+@0#5 z#@OcPq|>LyW2@KkQZ7mY*zt90@V1uxSdUc1h8I}apE3yb=0;qrE+qzLP> zYCQ`sY?`T<(h&)O2r(MMVt7c_vrK+}O;{>;5!#9|Z)dqGR4_)C8!f6-PkD!yS?<%C zJofkIjU*&)cAYP|ALRiJWn3;OdDV%_;wRcQ6Wm;lQikbPA`@fZ>b$GpcU90Sa86gU zoE-^_{4ui`-tc&tGukIC}rIr~}-;t4h&a|yY zDsZh~__}BlLVN7syL|@ZRBlJJz^S7&R;(Wp!w@Zi&4Go z*U)`eEBWX)Z)IBNIe7h7FGMBX{;h>23)6+CrP~WXC_kQ^6HYy`r6>v0v=-C&6tGGZ zG;3yf6oPX{CbHItBLb|+)ZCT6Nw&S%ey;74u^>Vxoop`e;~X=>@*a7jyKOjzWNc@c z31YIuCY}M&sgecfITqfeHTe$u`s{f;dvkmyiplT!IK#)?0E&Po_gNkUINPar5;x0f z9I^w!u{G+b;99lv`95X<*ndmw`*Keu)=vffMXus=a0w&~^adbo&+rD6{CuB!^Sh9$ zNra&?KWQBnbOwaT))98tdBj;M`cfN%Gf;CZahdm!^li8VNLC*y=#!BQ&P+j5tgf#l z+!n^5;Hy^P7s6KrFT?mrkMKW#xS7_lE_q zm1+FI%VF(`_u84Ra`3kx;{$KEYSa8nh>Pf4S(dIQQZ-Vz|cMq`1^!aYP#Q#m^`<_no+wI!v$fW)VcQr<~gs)`lr z0E~q~ZGQ`gt8iHFE5kOABA}|y&d5Xpmv-u!ma3MP`q-DvF((FX0p8S{4Y28;pKJ`K zk)4%{0NjRa?+ucYw>0$$l3D{S0PTcaKn4t?eBr#FGO#c@6MOg4Y~1{6uDFFiib?X zVjlU&roDKYYKHilV_J8WjpHk@hQwNgt26?+xCj(ZK6h`vK&{k8VsEa2DYBa@{7pS( zfJQ;nMusXh_2#{hpfV^YHj$}Ml$5uQ4Jc7Zda@CwfevbyDrImO7#4wks`)AK^ds|l zP~D>Ou(TD`tfX}q+%Hl|606iQt_mqi?vHIv`2Y4!2uw-6x>;c>_%U3t#g7vvk?ZYBqprFhMtBi3I2Xi$wd*ye zR4PR*3%>aLMbo~eCc#ML)+<{Rc3)i6DY9WFmtq{NE*UI1f*-Cq#;qzlE=;pKrf(kg zE1iv}EBa!lYNV`*hqeYpqukfgt%ZyaH)^UvM|VPmp7zb4!#N2jSoXf)1 zk{2(}xanj8eZS(Kj++A|UEhi$VW~61V>Zo%Iq$y&y z&oVRJj=m&ojlTeu=7osiq}EWRU1`Z?J1inOdFGmnD{OMJoa%0$?-CHt_0d27`&0V5 zTRx(V&)B@LeOl;<@t6?gDZ*)df*^Sxf2`y{@(HB?9hfUMfpGLR8&)oq!PumyR!7oA z1;{lgeYF}!EePYJXP4wrw|C9GpOY$=V8y!wUU)AC=s@Ao3zE_omJy{PkpLG`VpG8( zgO4E2;r8eBiIhDMH&nWA4wcMtn^l?BU;KCdfYG$@V1sNI|1~?l`Ep6Q@F>PniKHxK^&^edYM_l5HXGDc7Y!4Yc3`m7(SV$gW38 zU`J!TOQWhrN(&$UuMPN&2z+HLg)rU70#y>9RarB_> zAP!7nWt0y!g<{=6s?XUpUtH^ha*%>3o&OUy11Ihlj3AJYZTYQ_##?(t|LS8nG44%V zq=E4DGRk=_Km^saE2+$|5qb$YFRFO%Jbyk_V)kcD`ntoML-rplk8Tl%XN+PUEa2vzf;oTMMxc9j^e(HYWNd`4(v#GaW0E&i{QxIV2Ji&hhu)o0@G4(2 z#adr-Yk|w0FWOFh0SU3@cM#*+G>uLQi*M@v+@@UKB=#@-6RCq{)7llobrL(XX=Z{r ztu1$an@5ka!7r!Hx_ie?)T@3d%!}1;Ca)LTpL-ookhV-(l&=)8JD^|67-h3f z6k?ngw|0$L4kk@XmLnH^>)j5-21kh$$*k%&;Y|IavG}`~;ur-nFnY1;elTLS=Bh#q z>I1nK3{$t)6N=|d33)hp=M)H8Ei5mtL& zi}ElDeDB+0GDbt16rB>Hq7;+Y9$bA@{L4I%a)cQTt}j_%@X>-@$#=sUtLCp=$lAu3 zRy;XbRI!U^v@9%$f2nDg=)23rXc`H2hDBM~M_w=d)YPqK9 z-oxU`akIj$LMjg}$)vof1cm{Qwz9!_&P-w>%+d#jc}ZE5Q+h(`ej(JwJC$r`J`|xy z1G)~_+OaWG$_={g7c#^A54W8peAAK@yR8-$w5e0Yg2kRWx~QAnn(!9ISXr`zO5JI& zo|ifRNbX%28(p7pVh$V5tLj+9E-CCGHYFV$Y(dp-*|HS_KPS%Ga5y zQuJ%56zuX`R@lOj{MkG?SG)m?xAh*1(^_nPd_%QHxc||31kQvqU0)7jBl(wH#yeM5 zUH|#h z#AVvqxR#p5a!BBl{eobB&Vq`v=hx2?ojh+7KuA7RNfGX0&HTI}TB$nUrCx?luR6S! zr9R09RATwtV`Q^@e38vYmHrz_=&Qb;kiUf7lTHpzeJedQ(ODWg3U#~4WnPK(Gx+ed zrJ=5A`t=O#vL%9GO($0V^URacJ6w#c^PKXm#KsxG?g>@N@gmSpr9BysWP5}ka+_0} zR`c4*V-k9dOBiG&2-;Ix1|TlRaQ7&v6hq-kd69;^o?=&|V6T1DkIx&lC4+;{5pStJ z650)7R&jtCF3<)9#sAV-@@JO^0-Q%w0N9rv;5PZz2O|U~m3PE!fLxRo9*|Qp%nP2d z?E&!aDL5x0)<25Zfw0I z_^z%CDWmTz_ctX*v)`MlzzJ`3l`QB8uKtWjkbn2u?yM0)!0trUvd#XhSG5cRXzbS& zOmk0`yOxRSWRSP`+b31Ch}jycu~`?%8+%yDypvj%(VJB;r<}`o28*7JD#`=!Gw~v(da}0Z;kDb>|5KGYSd-k`RpV^0zz9rZ)=!N6bD533qXppJk z`}jdE3Y&4Y86-iv()tQJRWmwK;&G)_f8+xcmI=6Qg;$@%&XXn*`FT)>#|d z17R~FUUbVN>W?IxUzi?l+&t5x*n_J_A$|zzrdv#JrH#!A`hBqEz4M)fudlgYoj_%Kse4%73qeBw_p?g-d`KVT0<-sZEJi-mG9KH80Ri+xDxXAh< z!CUz4dV=;lBZIS{<}gSudGOu>1>ppqGpFKiu@yw6MwqL0(1qufLy#*|`ipxw~f@u{5 z>iy>CpH*3jfHlU|DuUt0XZVt;yDg3yxVAIp(D$DXfQdC|u+re^SI z{o6PhcenFN-qYuuAzJD&& z;R*Es{yNC<@7r0w`$Ot{A^?DsFiI8G1{{raSq^g{B{+kLOl4K z^=$4-!Jh89PK717rtZQ|QK0dn?hU3#OT=(LP{}bmXr7&+21DJWBXD*R@X?Z~H$(a> zh5j?U7_FdH;#b3@tA=#7Bz&x4_2Q+8>EMl;W^CreM5KEhR00j3nzv3E81OYpW5Tm6 zezP>!+0^(ZoxUl+NEdD9Y^sk;kllVQPI5(^*Isaxyl2VYKP>Cot!Y}IfAiZjF+U-(A-n9NdSMh27ADIXe>akcEUEviAw2~F!43gR zg@XWqEtb`}-&%s44Xhur?Rko-?@c)mBP=Zn7SVQp;w_K4wb#b1d*OUstn?cSmFa${{^hqqxLCg{(n-G~5%{HCJW zx=TS*-)Wc7-UM(y6#==^eQgY1&v;Cb2s*OF=WRx0aX|4;Z1Ra^`%KVkUf&(D;XG$v zFK@ya)T^Y0UL8T0y_eQd2tx+YrqWAj|BPo7@BOU769zRNW+!Qna2MTlFMD6OH1 z7PpdfwC+_WgVEIuAW-^LlCoMl+`Xq+6$D`WLyT=z$0_ARyNKQ*V+E>$Qr6I5G#$ z530r=%+MVBT7TfX&El@%hDMz2p6 zHcUd@p#5xpbXZUk;aU@RWH3sWvHZS~KHBg>3qE1m4l7*bEP#PP# z+4nHov|{}hlu*Ng3-$LRrY6MITzb!d&=rqgs#pt2fzHO8%Y7B%v!A0?HR zgAYk=j2mr8rv!1vNbyLggI6OvXbunC85VJ37(lp$3Z%%@-G6X zl|A3INsC3PqVJST*6pvry;Glvo4S(7)-cx2!1{IvcNg1%-fcBRf}i21nqMb6%ftdA zALvoDxiF!_b!YqwiVsplrgEFSx5tOo{OLQcw>r3E9ANY4tiybyj#sZS-3U@U_6&`k z>t&)_8uAnoW-)Jp&^}di?R}J{w zPB7G>k&AjfT+eLpD^;RKK(GH_zZkl^a(A0MdI6<&#j_mH+o7Uegc*{-Ge2;ih%4r> zbfiv8!77LT7jTADe>Le4bt&F6CRbL=b^S!7y1_On?(GCAKxid1j_TMUDFRMc+`$M9 z++8bnU>C%g32f7ofOLW9N^qa`?^ARXp4c)NL+=k5P6uyKcXMEAeSit{oD|r~|36*e zHeTR^#)86K;Or)2jZ-7=VhN5uwS4W)Yz|l~pbySj?8Pm1*Ln-I@3>+H9SG>iAVUOp z0=v@>D<4hZ#B%rEN=0mBT>q48VLl)s0OuuiAWq2s)#<__6U(6k3TTt)yy0L+2y+N% z`Dk)4@Lj*|B#h?Uo3|LV3n30V$z>7vB7obr_odCH5i&M@7ApVhU{{5$;m=BQpd%mw z_z*L9{51_c!^j_%*5Ae%0tZq(vfwqfc>sZ`!lw7<1-v+Darg5~A=cv!Q$nw@ZlufpDE!4#n&Q?QrO5bM}FEp%8cHxkZ0w{xHpc84M<<4ADmvqfJ1_#zy_>cN%j^Iy5f=o{h$z%2U8mKyOU^|%tyfj7Fk*{|weI)^ zQNL8XnU9jOAe_eX69KLK_sittX)39MC&&R(aHl39i`nF^jI6|6Kpq2;0}%j71!zab zbe!xWsBvV?0PK^WsN|pJD?t71EK`y{9(SiK?D`tj#=FpPM=sNbhz(_{{LuKL5l!GT z?ebv1ZaeD+^ub&Avr+R!-3;GMzjTo$(&u8?trMgb#zbh~E}8)}a#T@c)0B`KU&@Oa z@)6-039(E2Wvx5&fmW((yX|)?(tRak-6`5LuZq9C26}>#XVjb(e*-(8v{PmHsZPwDJu9j@S z>~=ycM6d?{4uANMMaQ3BjaC##6D_O{EI+ESO!oB;Q|=oo2`HJ?#X{#;@2Sbs?&fw zZaYH&o6_8m74q5F4H3_)7CDGR4T1R7--n8z(v%)^M{frO(4X4ZqZv9DDxvv>+7j(o z)mNy!>;Ia9Z$+8_CncT7__hrIaj4^oCqcV~iiSazj`EyGe-xW1>y-KPjaa=k)~^U2 zWO_cXSNlH?qbD3BInk(`_7&7ePt#N#4o}O^W`tLLIzc;PNAoFU| z-XO4)_cFN}lkq&pG^(lE_z7jQxeJQ#@FFJak5#K0XW+hGdi6l0&2i1WsvwpV)H3pE z)*^6<`CHd%E~(QAe09%^q;-X9;)P)8{<7S$393uU^_q3Y-G;*)!6t4;wp!pZ{^#KJ z_n5%Fm^WjImq1*MJCW}u6{3by*|gQa`j6>q?x$G(*K5;%uLVMF@=csM7{Y#r!7-k}-8zg(^f^BV2Jyqm#>D>XKyF7&qh9Ptg+24% zPpVwVoESU<+Nhv8sP=nc!a+X|?Nzp^(hE&efdXHtYD5l$bCB3n`isfK_67T~L7wA< zhivN1L>I=9I4D*hS;>N35q_*bgZXPhr5CCz^rz3MX!6*E&4{EpBYd&w_LycwdT zgT6fLMHeTuN-#DmD6R(t$d_zOi=_O1dhPNp)#_LU();XPI-w{budggS6%cDt4CDI`;jyfhJ|z?##Zv)Y&onS7&G6c zypU68S^qhI(W!Yr_;pO0S9pIsiHP;4)p_x%Aw|fynh1Wcf)!u9f3Oslqd-JEf8|2nm6S||fvc^g7 zj*LF^NfYcfJI9=No3Ur`X-&l8@x9jyerXPS7w6!&c|jUju=g> zomd(osA9iklgE&&hS!8`zNV|$1%9)>qsCUhgYb=6k$7KDHJ37W{354^%$q!i(ahY1 zr4V(N?~~;? zT7hG8T;m|ISX%s9@$Ez=zG8Z$&Oe*qOpQege#~5WSJmdS_;=&&EhWyNl~u|h)+Ou| z>y?JytOI(klqzk`Ouem`b!eo!k~@qw_Psh+Y6hdB@aMIhC&p=9Y=y-e+WQZf; zE=D;Te0XqrseJ3V73b`Wxjte0n7QGV;~$5UOOJLwtdt$tM?Ygt7$!KYT*x^aLZ72J zN?cg($hn#^MZ*UDRu(jnY=Q<6U&uzTKBL^Mw2;;hNwYSE{CT|Eo+j!}v;z+yFw_F$ z7p7bbD4b}Vwn@CW49;m8zqDHs85eb0tUdTVw%#(jBYko#dQge!9_i|!Z}kThxCX>` zjOxvx9z%A>WnUFn$FN;wqrY9#+%E|?atpJ(mZ6)&rjG>Is;6@XI%7v&efh+``(q(DQK$v=uJt z_g}gC1mD=hENXZj|JE;FWS_hT-*z^#eHUffWiFr)yguLCMr|_LDvaoefDd$++x19S z4S3k(nu5lqW+${*luubH#k-NaW`uxyE#r%R^kBr`BKh??wDGz|q5^9PB~!l@@dWPr z#UX4y$T6l>|J##3M<3cA%8}i8u#eX!ze9-SjZC{Eli@}c^0$7dbFf#%s?ybL2_ezr&IXY%iSh$%im)~pe)}$ z{`R1e%`DXeJ?4sxc3!L(`q6q93u&hw-EI(7TQ#Nw$8-J$1ke9D>~1&xI^2LqB$KeO zD!3MGe3$+=q>D>lnOuR@-w_?&EB87H!DjyWDnGNwEWW)Mxp>9yM7i>Pc^6TVEqsR+Lt~lYw?F!Ld`apD zg#pq|Q8U2^yF`-U?gC85S#`suU*=gn)6gYf)@ES7-&un1A^wj00NS_m?yLd_0yMtR z(f8K6rz6se6Mfjilbw-~)ixd_a!^qz!ic=L0vZfieb)G)t^2d;@8}PQLuY=IQe2n7 zaT6pQ-@TiIj$l;{dS&Ovcjn)*^_5sA$?P=+^E^R@MI)^4V2oqDJ4hgoR_KPP z(~@7#MJ*VezSPs|E^=*wGVPR%gyoC7=WiH4HWL9CI=J#FsRu;~{z#Vgo$ z_#9_-t~jiwpVw)YUQBqiUTln%lW0`o{v{ajaKkfB{@T}j7a%O9E$eP$E!ro6;8xjF z7=+4j>-A7)$apInetMNt5JU?(#Tj7D5z5SUQD+7qxMh2RD|A{kX{*1o*8in&+&Yi3 z?2erA)=^mSYwbmjh_i+fLjYmp zZ+!_NJhk+f>92%~s$)gcFv~mb_hu`3+NZ&qRp_%q#VYO6!SY!0Z{ULWgJDJs(&)pVRzGt)k2 zpC;PW;tr4LAfL}azc$e1Ov5UD8%L-lYPD8$HUs6i*Rj@!)s6t;$-EJdS=&Nd!lR zC0{F)JB+$J3R`v$X=iX*5yEm| zs2@+Gq=c!uaH>kxYubr4i$jdX6oa-+F?$=&h&M8SeLlAc`n_(mb*J8mXrY}G>WXgY z0SEWM8B4ts;Bzej?hej&4n#Wl{HK0v2^B!A0eOyqq{dQ`r5>=+tdblyKpneXfD>p5 z!wUz$oEj?uLG1LZ8W28HBcm1eKo!V)p>J+bGJ6E1% zq!xV*kXbN##;`2(%;t^wBa4&-HG%-(xO@kyNIf8^7jaL1p;b#Mkr(EeASqq_Bw2uY z`TX!C$N~ISm*Hcwj0hx{96$foF-8(~?>B(?9oMUy_g-w%P!0G5JIDXLR0sm~kVRrf zJ!`HF^U>ais~=!2{CcQ~ARlqgSm*U}FV%L~EL5!sl4(4bt?k$U~QH32`K@GbweLD#Q=J1e89jZsRt3;A}gBP%Gr>42LTO#nD^xxC!> zuu;PPsdfddhCWu!0V5DlnkCVF?o>&>4DW!Ow6?!=;#8TEOxQHEdv1|NAfhWsv=(Yj zh;8Wapx`(+g6j;}mR><`mD+=dX8=vo=yrceLhy#vZ;NOX>J@f~N|T3F#hr znJ_hc%PFT3CKnSC|6}QmjgZG}%~eQuUkG*6-=*yZmT0Hnd;-Kl>u-rtuGS0xw$jDSE6SpIM^8$fLxqJAI2`w}*N zJ?W4A|M62bkn`)!=-p!mZT2)x=h7>St~uDb(+h??ZkD35|L$McJ>32}76W2bGhA$Q zrh>_n{P8Fnhc?tHFE>Y%d_q+TL&;o6w{OfAPZ`xDMekpE<2he1AlU_IVlJ%$=rYx& z{aql@+XztF51q>n1L>~+mhCi}HUWpOFYN)xHUPttb_z))rB#}a;vR6Cr2f*v zO!MA)RFu2l;`au?8C@~Q{XV<2$O*_|J>xl5ck56g87+Ulh9)jKoHLAVIlq_`;}*?? zDzbi$?ZD9A>T;zY4^%O3L0xn-_H0Xww#nGIlzU?P6_fbv52!4>P8MtO@uw>7sUY6p ztM@myKTO8ocz=^7Z0)f7X3;I)a0<7Z3P1`?^?&&d9Xs+XW-MkbdeROJ@B;pCZoE>!>z7OSgz%G<%|e3R|)56Jf3*NgC0HExGuZ)>9o z*bV;@mE!Dy$ybPm+^?S-9rB;OFU-**hOBy4*sWbik9&8)FEp;Yq|4TswY@n)z_CEC z#aQ(HSk_*=8Nu(eGJ_wER@01!Mp>lZwCIlV#m<+7Nj$f0&N$vFGtHaW8MX(MD;a?66 z2%!Dl#X;++zq!hg_mW{lTRWlZ-9ncra2tN`t0=1f%p&_efbjcIA2PM#CqZ$NG#lsF zP~*yyg$`boRXvq%b}z3#1=)PK%Jz&~9poG80;lsZ7lqSmb2g_E2LK}E6rX1d=b7?v zF%DMd%elaD2qOc@Jk#tp<2u_v&dD2}pI;vv6bRo=rfFaj$e}U=?OXrvzGLtTnkwRy zt8zWnEIZ4~#CV(YjZR<%P@SW`>Pq(0Kh^}eM2Qnl&O@B(;L8>C=X~`@Ljnee2|ZPT zR}3vn((9$xrTt2N)J*du&)^X;No@r52C{lEvmdQaI0%gm$ByH72RoYf3?6=KyY&ln z;lt}7y~%##l?jdJ(v{2;i$5R?0KR7bCGOYN!AxkZ$&@H140V0M$S2Mzb~(m@ywqt} z=R#TMwHKekkc_&XE6dI3Q~1EYeA0kQ|9>w6w#HhWPw(M{Lz$^R+P$PD^4+Ci_|?Cz z^WT>FYkD#gq+68{CIt@1ZA)t8nEOp8=};>y+ga9SG3GJe`6@zz#HfD1Tn5J=bB`}q zsggq!eMvm%uG;GN>>e$=!>GMM|4lawg>N48$G?*S)TC^uH~;dFz#14^o-UALR}rXp zINaGewxFz&Di_E`W`OqT|936_1@<6G+<52_LV*ZX#a=sh{ZcghP1tEq#0)Y15ooCHTWQcP`Y?^z*L&i{ zq~Mf*pZScJxm5NdhsPk*E2sHh-$VqB9k}7&Y*x!^$5)#YJ??mVwjXh4t~h zOc$`9sq|4|b8Y2F*QNs5MeR$Ho-Wa&ergbYoL|ZNN3E;md~O2u;~aDT2P8@Q_HVyg zWA~qHTlGIze;!@t-?qzP`KI5wOk-WjhN0ig!RBH8yO0)}F0pfJU!`4g4b`}#|JFk6c7B&AxEtb5HT4#ulkE>(jae`=U9C}X zeIbw}(w!{Imwj%EzWtO%ywQB*fF29io;bC=mTtNzlBkEZJKs-rXmf6n!eNvd7N zIQzd^h;t5><|s4>;oY&2ek-A14MJ6b(VjQ_{xtFdC3{Y_c)I-#ul?Z%ZOWlfmtMC} zD$3n^fq*%jA(XiY4Y*UV?6(Zb2yXTS%lpTrVI@h&S!wm@%6BV9-CkFFLbsXM7L}yO5a$*> z&fRM&W#d2I^^xA6IF^PhtC!m=QoTROwtLn%z5Fe~k=?4!;d=YFV2ck`O35fc)9sXJ zad)Z9_b&2ZD5F?!6a8TwglhE6Cz|wRET;J2MExaHlpeRD%-bCr)#|mKT@HQn_)Z?z zD6PDpZTq6=Rf_`L$(cic`2$h$ln+>{{>qN`sDHy<9ih~ii_vgqxtHzSA)#m+gdbUi&Q#aYmfqM zbEK<*g9g1ywBbFn((l7+Y2hV|9P;uSH$YQd(k3vG-6N0yH|JA0^G zqi+>aO#G~6A;C}d*R2^en(5MTlB~MrGnR|y=CeF>*4XNqz$|*iLe~YPv;xn@TFRAV zMAkIVsSK)z9`O<2fXM>$LRnZo?VMPp{eH<7i(1*`QN=j?0i{Bv@PXivf(|EQ?RH}c z>=ps&Ak*hLRV4}sH_|#@-r*f@6xUG?dzzq5{pHK}QeSnjVnv^e6z};@6Dk|v)LMArrr(c=KN}kz&M--?~oMbHK&T5yK^YYF{)XFN%-K2rMD2e&+I(8 zf>uZ8+kRi_BG1)%2l=7Wh55N&7uhWxS7>j`FPkQtmXW6;r(x+FqtS3iaC+m z^ZLrXCP0G>=TUv4b)2#!zI}E}SGVRMQHH`${Xw1$OU4E|egx1!g;{gJ9EdiZzF*4=Nj#4bMCP=r zrWA>ks*T^rFq&u@S2nMa67g>N-Di}3wqGwV#JmX)KbL}86NO`xO>phbVT+gJPSCFA z>RtjJib#gLIZWxw^)f+PS2{W0k3K%gqxUOlKq`T&-3qWeH8ZmgZS4|H;_jRM?Ll&* z57Y6HSvH&>G3#bB|7r|3Vez-Y>q_=z47c`j z`lGf*^62zWtow5J#TZ3pae4_Cbh(`DOfGl5d^B>T6#|M?9u^4#w zog(=1Fa2|knx-hwuQDV{g;~FoxH3oa`mZi(^WxsLh?1~ZF<<)Ks$1R&6l5j&9cS&> z58-p^*ApBLxv!1iKm@+LcH*g_oTM!<$EYokq%V-MWB-5jK*K~PR18eNw7+yH-$Bmc z^Aw_9udRLH34gZ0Tgllzy{pTw{UNA=z^gzn5&AjSyB3XYYI}?DzhltQ(IM}mBT+_B zxTt`E2q(<&=I=n-3Igw{Zo}E(V|BnQZvO!tvB&>?*!D$)BKaIcEIp%U*F2|N0+I{m z$&OaiEs^--?&Ami<;Fa7Nv`zPN1inuz~pEtU#K*97@yU|`FTW{javj z|86%#_7S>1xNB)t{X2TAx}Z6@yG_R6VUWU#F0|8#0z|*>6lh|j4swNK&t-rspbdbe z>YDol`X$TrpI>-Kz%0q=~(YtY>C5Rs@J-g*f+=c zJKN7(LiKOBYQH?a-`=+7p&G&NlwN7EUb1A^-qv*K4YQg1UWF5z!Sm}eD_L&>6&;rm zBFZmum3OypjO|y>Tv(kXe#N%;SPJB5vxpwL1&4!Tax~383-{6rQLwetPgmRcFiI-= z^R4n5dtPb(SOOX%OYWD&jQ!{1J^B07ni%VeP_lzoRsYlBDl4?{)eybbPaTBRST{R2 z4s|j8>cQ}*w>3}wOrY8cx*(so9gLV7XhCxe;keqg5{=mATMmjgLS?xO+OuN*g$3FI#Xb9Kg^e(U>7v|@UJ&5sk* zU2?dS`>;aqOV&qlBizP_rl|P=*>cdU2o{ACd6A zET`vFllK2-_5U~PZ2q?>Qif1bvTi39ZYq8bjmstnqQ~nvCG)jbI~!hCy;r(xT*l3zHH{N4#njnc-r(ZAKF-d&a`d-99hV zjrctpXcgcn`#ky_qzkbbhl;4#lHfZlLF|T0yv3*Ne^#|5Y&tQDBnrSA@4a(+fL!s+WDucxb@RFI|p@gzc%b)sdpqL(gr&nvcgH zAPnU4QIO!9_W9XZr3jrZ=P`B>ioMZ$8KXMM-6qo+DvQ~;kRb6Fnl0Sr#@nr__CH z22C3^iy&7K(y>|Vl`Fn-LBl-Ube3)2ndw+l9Tec^t@;QMwLFvT&!CSv5Jjt6#B^qJ_yjR5Y69gi~xn?1zY z6YRHpSr1M9bAEV^zln&yv$ps2L0$3&fYjexT85diM0mu9IpBF%ysOr+`tJ|g(QbI! z8}8AP^dDCzB3nLnKtw2`qFB=(PXt!TbJchq2FMVFvWfDz-QIS8$(}mc1IwrVj|iu} zK#|_vJlpuo#*WCMgL0RRvv>KgFYPGLpT(0*M?n~DV+ua^^3MQ;u^NOv!%r+cLdLur zWk1oV^Q^Tw6GCN4Y`HaKYs|*ULsFQ0 zDT)?E1-*38m>M{d?b}N*3cL_LpUVioCUgUV-QMAgoLE#w0LXw=Pur;iHYB{s$4Pjp&$OD1mAC8N!H55DMO+F)iOif_%o zYX#qy`+XyGFg@m-k1c)fV!*)SwZ?Xme+4P(>xzc|gA+6J5m6nIf8L0A+e7-)L~Ko> zKm)A(oxpSRmae5$yuR1{y{moeHY8GVGZUj=ul?n_e)~T20<$t~ z$X_&{E7nnv5PY8@CB(-&bR*WYP;2%29TSX<0o9=5!A`?67eyzwsjk3VXL+siO#<3& zMgKY}1G~tan5Px+h4@f);=4Wc`VhqsBRwAl6Z#{%7r(?%jZmS$T@R3*lzr9+XN2(+ zi#SZK2%oR&6+3~qAcZhVaOZan*{8jysyzhkIVYByntX@#9I69 ztcChdMpwQtq&#DionFsnI_I2mG)}v7KKse>Hu{#$dRaBZli5mgs`g+(ar_t7`l5fn%qC`< zeH_n}w8q*!UpVBi{7zaXY~ggdm=kQi&ld$c{X$5M_sSzBs}Vs&$tcU@9yxA$sDQ(w zY}~{aY7S!C0-<_(B60ujyCPN{P>^^}@DEjXljY&XyTt0V*qq3hFa1|kBy<&N$@e<% z%$-GE2$5%eg?}z#BdyIenaNtknrP;3FcEd;2ec7N1NFXuT6DXLGhU@R?CrkRfzR&v z;q)N~idMHwx3m$-@Nq%rxtT(uaOBs8h}_Q_9ND>Cw&C4e^8EmVsY##zDy8YL%&EwSd*Xn92F6=5;dF9$m%VXN2rYX6C+~Ep5tj^0xeU z$@~e_fWlNxNtw4QUjJ0cw{fbtCbX;z`nEKnm|yL%crRX@O8~m1v`tC(og1LFxi+Qz zv%v8jHri9Q)l|73E839a9WMc+Yp)f)rrMR=mPBY5O_l{36yt^IN96E-D&!#eF3oKpPx-@$A!JK}4_?3#~N zZ3lhTKBefj4KMwsaY4d%@z!wuJ+ZHA51L~T>B80i^j?JB?%&5Jn}Dp-gz&WGec990 zM8p>P^)A;tzfEhc!ntlu<%YaG z%yL$)tc$Pa&_+gg4Zg?`%;;0O^6RKLYD#xBV@~*unq`tg^BdKa*=};hj1JnfLRo8i zl1Z=YW~5!&;(BVp4U96ej5*{i%`la+IrhsRP$fu&s%-hy=aB$L(#^NBN;hyStj{-G zFFU_rmfC80*Bh)b7j`4^h5mi*O?6DSHRaOtSu+c7&4)SD0|U^!Wc`RDA-&f#?eFBd z^%QduWNR6_+n1}>12~D!k@#;_Z^rihbxy77F_=n;xB266y>I&`Q0Z>YBhTr?ODh&x zSt2V*+Ih6>|3=LA?<8&iPoEW3WB!1c#RKR_;s@z6a|_N-T0TBs&(O<;xVXq3Oo&Q9 zWWH4VPOh--MQrHPcY0)a!LwQKmYj#qG|4FUL?*U z)fbbAtp9i`kD`H;e)62O(!F#|MpqThLzmpM$S~zj^m6uUHcZ+uU6kUpzW%8WDP?Z= zn?vB<9UWdJMj0`;^K0iAU{^st$I}|>5uMQ^JY90B8IKolKs9?X*pjf+>CBIp1Xz6S zHB-sQ1b!9enCyD>S&$d@!X0#PQD;MN;;~C%wFi(&m5Xy|T}YX?2Hrs8G#+sc=-&1- z471Rr-Z6)1$u-@(bYgdwCcAKL`A6>(8wn^_pZy4v$!0-Mm|N*Ua5t+HlK9$}M7wYc z5(-8(k)84aNm{dkuAMw4r}xfEwgXs68e9*o2=K_f>5unEOP~M5w1rlLxBil+-qaEb zrhN0z*fuexkS1ajVM(;>-@ZT8oe0UHCt0q*IW_itB*`5LwNkHj$SUC`*)I?9-G88Y zt#n{6T}=S?gUDi9e-l8*gqxN2wgj*&SvX(=N~^s&v~;<#{hF~xR-)pHSRLW^9Q<=U z8NabB*iMAv0cE>yJbF_fE01dqk@QGERmaydV!hwXR6jUp+@*7vSn_re(#qM8a*#{s zyj4{-O z{SwzslC%QMa4>t}L>jB`Y&fMdvEO8L$YH7CX0TShWtg7PJ9 z@UFc2j)4=`P99Ert{Qd3+johhR^i$k-&?{ZTI zDfb!-?cL<7!UkZlOf{%H2lk!d>X}8yHm;5;n{k=UGu!+eTWRrh=G1z6*_YN*zf8zJ z1a|IlYjT3grjSAQbZyt8Liln4z5qMUldI4q0p~?&wFB9-BUlxfA6qW(BlfV zlW}&~cu_2h-CI(3LRp0%O{;_k3r=SmWcc6TnCLcJama}6*)8E3_b;y%k1!yQ?RZDV z^u)w56vU(Mu*BHc(sto64R_|c@^G?75RtR7r_=hW^-}`<0ehaM>?JSw75{| z(VaoA_>e_q>nLj)<(n&%4eY)UtZhe;xUsXseZd&;!ikCJotS02F_Rzw_k&)fB+`fN z^IRDulq~gFGwq7mMr(aL=k0RFi)(5&JA5bKGeG({9hy}-CpcxL%N;!l7T)R4u~(() zU?eH^b5oG)^43u_KoI!Zb`T_#pJz4~f3?@yfwF$SsVItVZ0%C;23B`V+~V1a-Jx#lMs#R0v! z9m-1^23qIh-`rzwB<<rl<(zHc6>KaIv;2E zPw5Xe-n=(e8f*+)gSXeexu1-fK~=wsh!@z4L%%1cKDG?luP8f_qDmw0@c9;S$r0Y* z+_gku1-nm*#4V^k9(&o7u+bK}Pku8l(jRhY&%JYwXK~#tzwFFa&%luozN(HU#zo@c z;Q%@@zQWrXKx2fs?l^~FxeKc{OeT~T%#WTpUM>DyK=`!tD}iNnz^lGINq2*6KOU`$ zUFu$9K=79dh2u{=85cF!=en>{BZz2cfhe=Py$Qx*d*-^-xL0ai0j9C30!Ok*Ql^Bf91S-suQuAZYAIBI=1uV-N>GJZRj$w>3!)s zGdC|$uqL{e1UYAz`|SCJ{wgF%_g25kWS^*t+?rr2KX>s(ilWqk2W2rAJ$QKvR6!jQ zi>J1PoZ<>cMXFPNuO{{^?As>4ow!H0@ANMB>m{(9Y`9EX~-!GRkHJAlNX zJXnq}$>^{y)@){!(++iWxeoC-w4V)$DDPGhMf+;^_fwmE@7Ut0S|}1C8PfRqq9nuI zx_kr700pdcorxl5mKvkA+$U4W)~80sznC|KJjv*jk##5{OHT(AHxJ=8Hz#CJMw}1N zmJ76w)cWJ#G;;$Uv(N{%hi|JPHW2CRKcEy>8=uGb`NolPB?k1Bd}p8b#m({0z$}2P zg#oEx4r@6jpGHxAdPm1PExXp{v|Y1Hhw3tysztTR_Xfj;AJgB!FOfuqqH(I%EJy)+ zT`U=TbtTsQ4wW;cbC;BWHVhb~lQjJ0(qvw$DuXT+?>D{Su^=_#F#oCFSCyVu5^1Uk1crL5gkkSrhY=ObKz{JZKv3K3~AJ%A`-zRSm1V@gc-g5Exz%L){ zMi_ToAwIL>p6KoIOw*?AC}qx<5x=F_=RR07YeP>>p;Q)4dHc=9GVjd88Ny;p$xr@A zgx$!-SifgcH_f%c?9+UH9;qCuA$81R`sw188hH}Wy|l=TWcRjnBexGwVTN1uM9`?l34 z-Y)i3MwUYz*udduX=UEwJ=`E^*Gxgjc0E!awc5dqOmJ2w$)sTedN!A#!&UjNexyD=PD0T zn!ml&oMbIVF;gFU^eaH)r!NR7f)SdOx&DA2a06gqcqq@`MlQ`Lri14&QMkys-GuY2 zG3bG0vo@p8O|kdQ?@Y7vYT3A3`|cK3o<5uwlk|W}tGJF1POP-%h}$m5R1axVKBac0 z8fsyq+ip>?y6tLD0c@k8wZJ+5pL2Kf8=&PDvxlY$;G4icFETB(R;K;hbF1vPK%mNq zJG*<|o_=GF7A##b(z1|0MSxrrXYr%=0!24{Al0!){CNsmjq8ox9gD?$B7gU>(dnT{ z#zE}oc^YO@tCwwe0^e!c1ktJVx;GAz*zR=@od8!*1aOKEAE;5Cio*f;_*g#xgIxm1 zSH$C~;Qt(0KDig)rh1E*ZHGTN$Z4)szpS-?C^Va`1OuylsWahFi-0`s%!WBdS^oA^M>j* z(%iE#hJr?-Y)-+L!-S*G-JolSARkDM zc_hnvvOl26IG_=}xWRxN-i;x^|79r5e;JKwvZLqeNaHh_ByW&Olqt-7(nBX*+zRJD zbHMS_s_zi;fF;rPMxv{7C(~CO{mjJkgpOGdfaLu5`Hu%|^KaJJ)Q>tY>fk!sFE`sg zVcaxe&v;m{kR!p+DV|;3wLWjdpV1kOFX?z&3tgLBZfHd`w<68c@u9~O}> zXpM5+iA%YdzhbHL7!z8gxX@9Onl#hLYx-(WvS$zJYU6Y_)+Dxt&9E&{ap`|}q1JSy zKfD%h*B2`plB)Y?XIv+GApdQZ=uHbJF~z)vi59BQwj>+8XJd_{ug<7A!pdWzF>}5A z(nas@%?|9V+DCx{16xd5ca;O_HqV(+KXVm_spA;3T_ZV4TFYtNXc+TAauR)FD54_5Gx$Nf*vk_cTnyLTY1CE)MTTyjNv3YftTLzNKu=&@M zaskSd$vxk?CGk@@owbRtxe%Iwx8JIBvkz15a;C1-%-`m+|BJ@w-(&TEM1x46NbKkv zni6|zB8m%D6gy6D?g*2+rWMOQ^*#C14*%+n*2;5w;$mfucg;9Q`VG^X@5m@i5e>@J z`&Tn9nir26zV3iM`ji6Qf&HiPl;>ajkG(xub_}WSCfkuxX=Xio()ILC_W%)0-vMcx|sTOB_!BMAlQ?YbrXZY09tE;yc@+m$V=0a|op!vm3WSfHHd=OZw9p%L+$4=ZmobmxmvlO`ubpk_wtsjz}V zFKh{zxGqg3lMViE_1k^>T6f6Z-D*k6HDX%0*pBjy;I9O<1Gpeb+4cH|Ft~VA#FxtO zBfl8^G8FFG*k4kBzzsI@gae53A<00WEozR(R=>$V4W9npXJ%>D`RLHjX~!j_6O4> z@Fzt7Q?I_IZRt6*NOQF2ht^arOQu&2(#v^*eWH1ja7SALs>_3jia3?@x>(S*x)Usy zlHc!B(V1-Sm}IKtUm*H8#D>8``eA0|GajkyWV$Air|=}BeZo-QT) zFe2Ed=F+5hTsn6%0VGcw^fB$kan_d}0L~Cb_tj^zvIg-3_R471p%PAa)dyT#4<%Fdk z#c9;C-2}`$--5o0q^X8%!IbJeaXa+kvTH&}X^YtJXr{5b8%GG#cf&?SD1L5;u1S&y z%mcc)2vMl-To+g%{yuUNAhfJ07?%}|l$`1vPO_L&=Sp|LX>CsksSIwIS7yOLlxZN}h~jE=3`rd0pdZd?l6pMWZ9m-!Kn8uZZ)(xP!HhlMW&d6Zij0z6Fb@$RTbVvjYkSEb5 zu84A+-SAKi&3#?>c}mQE)=ADgiC%bHUdlXt$qRSl4K@@}@JaT(O{Yakv4Is0Aq5j( z^>;P9{4|4Bk}~^Q5`3n)+3Mg+1EEouf4pUYzGc0NVWchOFoA{IIKXI$kA`}$TFyF- z)rP^0in|4;_BjS82knvPN1TCzO|Po!aT?Ru&TV&hH9Bk*frmC=mYanr*N*1_(DdSM zgp_CjGv_Gx+AAB_?3_=_EiK;M*m134srrh`%4p)nisozA z-YZHv=Wn;vhOS~%>!yFs$z||SeZ4TO@vckb@`yWz2ay)RgBLqzo&Zym`kymDvEVq# z6xV-f1G&Xvb43y0l7z;O#H|b)rQ;LIo+b`-*gPcLaW~0?36RS1`VpV{u)E!;>)wD`(={=i* zo;KUS6p4Mb=WFP1Q{G5JXRc3>NzvimjvL-KUGVFt_vlc7XqDkJKMaI2We5&Tn0^B=0+nq;Dv%Eg#fLtuSFt6dc)x?x#otk=EtGB zhUUa93et&@rF^dy#W%+ZwBYF}LIprV6nI-G9;l@>(r z5hss5vNGUt``2v69v!an&|M9amcH`dZGzH7&Sp{oflTS(1&~=CzLyt}(&XBrQkPtv zZh!y8%aC)gBEuvKNpr}}!XDmBdu4n@`;*+$uF_4HkG4nT7uk4!u?X1AH9qhlQlrOD zm8@y$IlURaH-%A~L*lnX)rPpKKBB1{ZkVjHZTM`fam6(kdSm+^Av2#5e}qryNAY2} z;bDXKcwq5}{VMzF#z;h^3hJt|keDuZ&b()l{R;E<#E?r)t~>%Rfj%Q|NQJ#t7cqJ$ zAC{i83(m4Cib_TenPxF_ntTn-HJ+=a75?NUCTk<&XqaT-q$Eo}_>yArd8wT*QLes{ zMr!Wkmx`v#Z=5A5mpwRL2B-=E6f{xzJUsg^ubBSJD>{iC!7J^SjBFmNEDm0Wm&;7q zs`JN&CHc;3xGB!^n{r|7>uD>^=X)CH2{u{hi_IqhQusw&I}2Sv%4TBJID-Ch6MI@T!uu$>I*R+yPa`~5*O zYV`4s2~LwrOzKb6btD%~^{O{Ye;R36xGuk9YA1(j{?^NDNU5M?8XT;~E{s-Xg#YyU zh9YD;pKDh=M~q%S!c0q_su4Q<<)l=G8NJ_;IVD{@`pA(5LVJPJ4{D;%S!&Vo$K0n% z0JcsGPO8|8RqeB4oKV(x!IKdsyS*?bP)~`sC6h0-(mBgRAH>wnu4;ha609jQKwIum z`o79h3kP?da)iFG$elkp2i z&J5FbE+o;>-|gysfcpbtAqml{c2gLTGV{#le>&9RCJtJL?*zu=?FA?2c(3<=cJsBW z#>Gl@A5Z6?uEN*^ez(2#^%ZN|{g`ejyjwqe^SuzIqujyWwLldsC{d8{Bz z;rCrQ-L{e1UEwp)zzc51%o)`^a@5e3rOP21n?nuvaQ=XfZ~J${cXXc1-#ze=!F$Lig28@s)%hSC!X+1d4k{Hfz10-v!5TiwHp>b-RgIT+ zvV>aXj_mJV@%$l>+y`O>&f1V3pkZbU(aK{+#DvF2CNiY1poP>Wv}WYaioX9~#aG<3 z!&A4)JfZKAyfP%%hby^7ddIAEZ;_wJVW876%Z9b9%2KB{;8K*|<{W+G@r8ro{e%|S%f+@=qg?WA{|6C&J0cd}LX>fA^>1)3^;X@lH%6;3o6C!o7sp;EDL-OiS z5a(A?*Wp?l84-i)`~yng1te2s!U3*>D*dZKg2y?kG54~=v3wN5-}vm{V>+~Iw=h+M z{I2$mXUUy_4U7S71(rUp2GQQ?j$(1}WcPFQeIJJPrx_;3H|ci4RlMAC-icx$UzAyh zex1hv(_E=zfaEuslM{b^gXh&w9+wJ}?i>!rvFW_#WKd&|$2mv*x@k=8i}|8)3Ct-B zV7kd6*&R)oIlajuBq#M!)wiY>H!v}E>(8%#)O@aqj)lq6}8(<8E& zj2^tZ@HMM7Ij*?iJiKd^C~J1=ELz(!yXTQ3HC zj?0$Uw0}kX5wzNp{0xDT8m&DFlDZV_Dfq%N`bh0X;Pn?Y_&^nE<8P-fwxyQ3<=!V~ zqG>_ESt((6;5spEWE8f-wP$(^4g2ikRs6tqWr>=)l|NyoCx>lTL|qj*gR;M%c6apA z;1p7KSt)wKSf~nsLeewNG}|k;*0SO7o3~OBAGD7 zHdO-I;MSd|02lF1Bp~;;gr^UakkuAfCOx*|qP^KX!0{nEYfCV^Zp1D1T$1IAXm0%kIPCX zWl>X8&@kcp_09`z^3A+lqo@3TML*yFihi4xqP=EtfKaqKLivGBRp&IOK%rWu!Tw28 z|DcWNcbk|O{t%mUmgawiKY&)^AK`B(8fdph+7U(3%*Rv_h|iAR(Aw8YO4KcDuPy6h z5{j1XV#cWQAi2LTOsZ}!>Yq_!(gEUCI0H`?mioa8lutxAW@ZPn` ztfBeY7WfBL>Z7`U&B|>#H}(&RUL47m_dqgES%tH1{qX0{ZV{U^k0{9)zgMQ%g5u_1wtRdRACpBHn1 zT&>j$@D=Cx&N4bbWp*RLY|W@t7A>LVXVGt+?vcfDGyx3^S@c&)nh(b$5wVhZ)esMA z2dAx_W%=Wx+Xx}?oEK*^{A*WBovOZ{1bVd{#;=%*3vF1M_xplAsdK|~aRoy`@5rnX zS>IPEn9^))YUVqlQae-$vg4Jfv^%-7yDVMXl$v9R2+n(=Uj*&Q3q_Hw39s7 zW#n3?_?nCmxfb^*p`7glG^Mlycl--RpBp=kfI3?^9Yspm2UL1t9ml3-GA?Bg|KT zIf$rmB!u{634OelTV31{^>CzgzNlN;j&%W%Wk*$;pGU7%5G4Wv{wRLH<8!Yl8?9J# z^p*k4S99VQvRv4}->$=NzY%!(ksJS+XW1^)L!Zj~BA2{r;|bTs{G&BVDeEBbyH7PY zt%B8IO)TN>K0f|h0?~WwA?#Ozk!YGVJ5$O@vG6x*@dlD!w+=dT?{uR8jkp;g8`P1S z_Q~pTTR`o_GP-Qsri$_E!%M$VAAT2ZHQ704&FkHV(((+CSLB9O<4D=1^P3sN$)sRC z!lb-Ix%D7?WyA#Cbo}yjrh2>m4u8V~O4^bkV_w&v^&pdNJoNsI9o+Pu@ zeXW(X*7d*szdUJD36c~jyIsG%KlyMSPZ76qeW)*McgD2a7PcHl0Q9YLo1LFGxIzqu zoA^mEUfq+P#!dAGye|sB7J^)BY@AFxuAF}DWb;n`ET^2-G?!DeQ2Hn~ap;%sD-VR@diSam#nF6LofV{*^eCr2d#Dc(u98z9f0PPeY9 zW*1B;s|!27`m$#P_;hMQ(CT8W20x&>!vgD0meE-uqO z9pRiXdzyN#BXv0_I+b`#>##YHPO?FA+;J%)kh2p7o`$|K3}}p!ahG81vPvz+BzQ@L z>t0mTEpijs0g~P!*Q&lP3k#q0Z6-%lUI(mWS*aBdP}s&an@q+Gg3t~cU5$mp+ues4 zo;E)3@jl=MKpn@2iXjh%t2B7v4+q<=`@Sw83}CAu)*;#DqhlNIP@{5>&44sQ;>aLN z)p^mAL8anPob)2%^arQQmkHMkkX8vs#vM}+(41dXU#xSd)1~Uw-aWMBXpp$;r*4+6 z_X#xC13KCR&I46$+0k!{+%j6OT+hf?z<>CTiuiOu`0|A7Ag?WF;)ASji;b_U@XDwfn&M+4ubomS-&nnxLe({@rF6vEQX_mNx|KSL9%YfWcKh z;jnQYlDs~5b}Ni@ehixH73K}!3b0aWZG53waSDJtZ)?PZsO+Tm#!Y0qL!IC8hvGT* z7!6g5_VGe`Kfd4%^gt03Bv-iB@&oWY2oQY2De){RabE9JL65N0d7PPaxWi0^$Of?rg*ex#WHdQ0k+oZ7EDvy4DgDWn8)Hq#h3)s)dM+?Su*% zHh|f;0n1>WMWA{)(Og(!SWUfjDQy`mHVGcm_~bf%O%!{0PXV=A^Z31ncZ9<;p!y`= z%a)qinSYgXy}4>-n9Q@~h9#I6e@_x2I>ajvvfG(F3HEjo>Z>D|oVQ5++FmwyN|ITi z`waYSsUIoMuayA(3}m~7dFnGefX4kXIbzKyI)!aQkePoml3z4_t`D6J3F{Y7dcKh0 z+eaH~fBnz^kn4Y|qnM?{c&;k5nm&L@69?WxfGfQ#a}l z-7z(oEoZ3dO`fFajVIJbm+uwR;0G;u_bN1!j@%+&Th};#|kH_NsX)2v{6bI24A+DGgfQ`n!&9yyR;lAn1_XfX9-BZm(|!9@RJ&oZ)r(|wt< z7xG<6;c<%)2|ou(!6&a|U#f)O*8bOk*B0W%cT*@T?O#$Y0aI+qu?`}68MpkC)nN9^ zNQmk9VD!eejjB#o>3*A8O*S#Bskah#MG4Ng1Ke)QcO{$zmL6|O_wSg*>Wq+lt)*Uz z4}*9V4e+b}_G-?LE}7uCq~5PdN_0tWOoUxQeDwq<9hY#xB>--qe)uS8tNRzgPEaT9 zzDNFy5zm|)4XEqUv`+-ef|UQ?U)yI_~RbK!h7`+SRVes|cQK_jtWvLoTC zaYO}`cJO zglak8{wGC^hdSuZkA>gZQTy47MCTi{VIEXO^EjYqZy+IK18{cv%@E;VcQbbdYy+Mi zzct7F`A%Oj<2L{s$FXa zQZj~OH7A5uX^OvrWJEvUnu_$$l)e^GO0cT{0Q>XZSo3cWO3qW;THg}wmU;CCsAks` zE`gDL1LXr1v?1|Z6B-w*IumwQ&TPo~PK!Rxb4M0qM4zV3y5?m~F@Dc*2f?>;l##mt z$8}o}0LYyA(7G>Ivw=eL_j-Cm0xLtnaM?$_O6Qj9xT75Wt$v*jdE4S)V()qikY68p zmI1Q3GG)M{uQ$&eVzGAL=Dk!qOY|kb!JY+f{!ENXiF6uZj@xe!=xGVR;?j?<^CF&N zo9YlXU&IZD)#4TkE6(0gcyoUu%trita*p=!QAoBggFar3Nruf{{1+e;bkz;kaGe^3 z)jD|M1KvwN&_eL>E?m({&H*Ai2E7D zvw{z<&%Sm#zyCO*YcXP{bR`>8{?3w|I!TQ&8AC&J>W8KMP>IZu7g@5TW~HQfVQphU zxz2jPG}|N;0(J$mvKd$7&)sccJa?WZq0&(#%I5*(W)p?Lh)#*x(s91xeop%tODX@E zlgz!Ab05pkts1#;KqGOcuUPPdh{Hzc9|J9h8UZd0U^&VlF!g@p7x2)bf~8Ekkf=bZ8inS3z>nfL!}%% zT~m2`@B#1m!UL1Ik^+7C$`IASNzh1LVUK0md?1)?b-`~Rh#a078AOH2;kqcU1NG>r;lfe`~Pauuuz-AMzJcimQ?T}MI?%4s7hGAhre?LMT_ zbX4v2hEijkv)Q53Y+wfQ_KiVpE}G>vuFGefG(p`GEKFOh;-nn6T@A6|oF(6PnwdB! zzytQ;%w%5CT_!5$jbBXDyN59mcR2v9vtQJEa=vta$DML0 zm~FnUR*XT#TB$>oy+=!E9PU6=ox`F*b}iJX_mCi$NqHrm6iWy8-g2V=E(83~Hwia` zl$>-HJ&ifxojAzcv_FE~wv@0|M4_`7x($J-qqLD~U5+oMe|`GC$TzY!_kFlAJ3}ZC z2xlRhmukMdZjXg0vhsY_66-KIYIgJhvzVVE-)twq@~0h%5fe&ZU^Rseu{~alCfiLO zszNsaS`kC6hPVlR2l5H5xFvrV>z|QOZm-3PJM+#=p&;kYbm{~p_t74%<83KW9$o;g zAV3v+);-qJ#%K<2u1{b21uRWl!-Y8Kl3WjD9*wE-7!?s1K7>=IN95MvkVi;!xKE>s zWrsSHt< z(t=l9r=*#BZi90DZWkD9s*^+H=VT4rty`mZ4P8lFU%$CszXD>W3$~Q#nP1#X=5@BlQVw->UQ1?0>;+8EsSX+kjacYYf~hYq**Oqbpkepn zT|>@?CK;z`M94Onxy0=^>=uV=4nOzM>&76|$@@LYuP3LEQ`A+y&z^h=c)+H4H z>2uBwv1R8xWkYQdi3{V`5Fb@C?Dc^KFUVy&DmTVViOASPX!r&}>{Xy3VZfj5|B+gp z;Y>5)-#yt1WEGEdU`OI7fwGX5-|tWqM^?8GOP88PKAAh5Gl}p$2Q0+O5vx3hblKWN zk6WzgB|L;uXb)|wh?ENOwm?K0pl}kIz_dduZ$q#nhm(P58674ctf@&PeCa4e6`EaM1|MHZpGB{A{ zQ6oM70R6Ou5oO!jcJ3iQgelug^U|Stf|n@c&C}C7sk7Pv$k0&XP_C}+btaS;9oJOBIeb?m zSL;M z^qOM2Q!kQ*$&_K7n8sh$2?e{_XHBSmPUoETP@n;(=4KI#X2-MYlE|bxf*cMtgYsLnbU~FQO zddi$5RcC0H^j6|6Y{(>;6BrZM92~PfA0KDN@rjQ`U75T%cLD znU>Ds2OT5-oxC!*kHJ{UIR{8aKw1<*qW-yE_)22x$SX}2pw~q@fa0zhz|fUa$Fr9) z0lXq-5yUGomP6-nE7)ke?nwz7%3rRCxU|aBwXnMeULMch22-`vfZPz^&HD!g3QT0& z+&BjL&K}XF99^dE^G)KMCrqc3a$qd|m7QhoHobwxuAg-(g)lI+w*(L^AA_=m(XiwQ zpqzCR@uRXOx2|HzV{Mi%3Xo=uJZhB-K64Of4*<{8-+5|)j*C$@2C+o@ z2U1T{*qhjVI#>v-nRMg`Eh$d3tHfaTuBwXZI&1Y={0QNwPh@%Vh-C~?MKFmG*-Sbv z(8t!t_sg*M=U1HyoX`R4W)(QO^7X(f1G6b*|HDr~kxKz8IP+SD`0c}1wCko-X9;tx zc*KCwLbz&|!|TUsCLc7p4cl+t%N|H2iL&7AatULfyE0PUykggO+skpLsC70Ax$=l8 zT8S-nhm*4@fST|tgV7(vG=Qr0=K%#}fVm*2l-VoD3!4h&3!p))a^H~wg)QBOXv%dn zFSoY&IQEfYq)aEU{RF9v16zYX4UI|6rvp}0w~V!10as16lP_AcM^}qYRfz_fYr+2g zIKZfSOwEoJIP-a4#@oJL$TN#s7hN?9;G5&l3?$m|-fsv_s1ob=5zs+)#^^Y&b&FpL z=RM&}5o(w4ya@p308-qaJ07mK@foK^fB{+iK^_8`VmV`HYvdx--xPf}b^aH& zg)o;}9i*!_=klT1o8vKZ+c5Tr=iD8;$l!H`dG{T^dbE$dZjtLAjQ>gO&*jObiojaA2Pz6!1cpYT(c8E|aUjX{97$|w33E5b9CbC&Ysuq^7imu#9wFIUzV>y zzhhyW$(@7(s-cXZo=amLF2S&&K1%eZ)^b0;=rbCE=JHLCLF=#@(Q$ME+qVwi{);l} zQGNBJ-{GSn-xp3TyleYK@78U##q)cB^zm=vG50t=RD%4|uxf%@MI8OGZY@~R3xClm z)x(?j1|cy2crMrbi}stP-$0W=*a)*7*{f}ZMS&wpTHlJC1v7#nL7wN)vM} zfIqdr4-??|gCfLN`?mjzlbkW-1Xf*=d=%&K0fCHXCYke_bqP8yg76!E9ebUwC&Gr4_H+4aQ*K zmYwl*ma96tZz~AAe;; z1e6M=SA_YnquV)$hL!55-Mtzc#>v9T%YLEr6*o8B≶)`Mo=TTKWDzk<%Hk1WnQz zX=lloMK$qOTgzNO^@DLp_(lK*Kb1bWK3}j~_uLAPQzQRUXnl+|cBpe(e4x->{_)e< zZ&`Y;>HAV%v~yiJEv~K*>i92A{5VwBs)y``6Gb*NF08*&zYOr4sTVg-D)i+O)nqGf z1Hl1iP=%uCXcM%=O+U2M^`(tPDU=4enK)5xY1*cj9TX8$gDew|s$ zEt+BlhQ&Ti~$HPR<=~aN_%!{{6ObDO!7MU!O71+ zrV37%@&Glqk2UjGkPOEKE&SjP&$s~V&?5v4_w!)8R1MjyHu=uad5e5;g6BKR zNM zBsadB{OBO~t;>?N&4mYF?E9xGYVo_~>WUu=&@M3{h?1i#O?AU)Y50klA%Z8rj;_A} ztKtb8>_DE9|ZogCTT%UnM+}K?Tl*JOfODfSeswC%PQ2PxBO@kd^xV}CE&Kq za?%T+CQgJyM5tmo76Sc7a@hIl<#Xy$N3{vkUF%KrUK02D6yAj=Cc4}pN2t%P9#VY1 zvJC}K=+Zo(x_qIwXUXk2%%bZ zfSH?u=j_Dq;MYl;!4-Do7vGr6*YeEfa7jK9=ia)|72pz0_y+GF4j;kg21$U2IbEj= zRuSMx-ptP|duxW>idXCq@n&IpZakE%Pg{;u+LT|7Bc((G*>rX5sV`pe*3a0R;&2<= zVYH2ERrrXRV3#FcqAmUDqv#H^6t$=IRkXW+KqO(au7k&jByEU1Ke=rFj4ExFH<=Vw z6w!lF1V&g=%FiF=nzxzYjgtcsR+8)6o3f;;Pm@kK5u>A5=%rw-uMiG_D zFJ$rfGC57RuLOnmBv{XVMlc=O%^|suZc!woDnry>HlSLAowHYOMfTa-R%$%d3E&uy z|0y3=P|JfX3ma@@3E_A<>3VRy7~@xlPO_i1t%tuEb-HG`#z)l(T1cMZ!e^g1QFYzQbb-#tJZFEET}Tf4Eo zXL$4FW?A3AbyoY|IPd+N-{Iud$?-JxJ Date: Fri, 25 Jul 2025 12:59:53 +0000 Subject: [PATCH 09/11] add final round specification --- contest.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/contest.md b/contest.md index 9a69ca4..44e1a01 100644 --- a/contest.md +++ b/contest.md @@ -2,6 +2,13 @@ ## 0. 赛前须知 +### 0.0 决赛题目 + +![image](img/final_round_question.jpg) + +- 由于0-1卡间已断链,则无需返回此条链路间的建链请求 +- 算法分析器不支持断链场景,请勿用算法分析器测试,请直接上板测试 + ### 0.1 技能要求 1. 熟悉 C++14 编程语言 -- Gitee From ccef2adbb99173d90e56101a0fc781e1e7ad3a77 Mon Sep 17 00:00:00 2001 From: fangmiao Date: Sat, 26 Jul 2025 03:35:27 +0000 Subject: [PATCH 10/11] update contest.md. according to the final round Signed-off-by: fangmiao --- contest.md | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/contest.md b/contest.md index 44e1a01..3821d2b 100644 --- a/contest.md +++ b/contest.md @@ -7,7 +7,6 @@ ![image](img/final_round_question.jpg) - 由于0-1卡间已断链,则无需返回此条链路间的建链请求 -- 算法分析器不支持断链场景,请勿用算法分析器测试,请直接上板测试 ### 0.1 技能要求 @@ -48,11 +47,11 @@ HCCL 资料: - 15 分功能分:15 个算法分析器用例,每个 1 分,通过得 1 分,不通过得 0 分 - > 5 种数据量:512k/2m/64m/1g/4g,3 种数据格式:int8/fp16/fp32 + > 5 种数据量:1k/1m/64m/1g/4g,3 种数据格式:int8/fp16/fp32 - 75 分性能分:3 个 HCCLTest 用例,每个 25 分,不通过得 0 分,通过则按照性能计分,性能最佳得满分,按照排名依次递减 - > 3 种数据量:512k/2m/64m,1 种数据格式:fp32 + > 3 种数据量:1k/1m/1g,1 种数据格式:fp32 > > 性能标准:基于 HCCLTest 工具测试的带宽使用量(字段:`alg_bandwidth(GB/s)`)作为评判标准,数值越高越好 @@ -113,7 +112,7 @@ ssh root@ip -p port ```bash cd /home/hccluser -git clone https://gitee.com/ascend/cann-hccl.git -b r1.5.1 +git clone https://gitee.com/ascend/cann-hccl.git -b r1.5.2 ``` ### 4.2 IDE 远程开发 @@ -128,9 +127,9 @@ git clone https://gitee.com/ascend/cann-hccl.git -b r1.5.1 在 HCCL 软件架构中,`Operator` 负责算法选择,`Exeutor` 负责算法编排。为简化流程,选手只需实现以下内容: 1. [custom_all_reduce_operator.cc](src/domain/collective_communication/algorithm/impl/operator/custom_all_reduce_operator.cc) 中编写算法选择逻辑 -2. [coll_custom_small_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.cc) 中编写小数据量(512K)场景的 AllReduce 算法 -3. [coll_custom_medium_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.cc) 中编写中等数据量(2M)场景的 AllReduce 算法 -4. [coll_custom_huge_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.cc) 中编写大数据量(64M)场景的 AllReduce 算法 +2. [coll_custom_small_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_small_all_reduce_mesh_executor.cc) 中编写小数据量(1K)场景的 AllReduce 算法 +3. [coll_custom_medium_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_medium_all_reduce_mesh_executor.cc) 中编写中等数据量(1M)场景的 AllReduce 算法 +4. [coll_custom_huge_all_reduce_mesh_executor.cc](src/domain/collective_communication/algorithm/impl/coll_executor/coll_all_reduce/coll_custom_huge_all_reduce_mesh_executor.cc) 中编写大数据量(1G)场景的 AllReduce 算法 > 【注意】上述代码文件中,选手需要实现的内容已在代码注释中标明 @@ -202,12 +201,12 @@ cd /home/hccluser/Ascend/ascend-toolkit/latest/tools/hccl_test make MPI_HOME=/home/hccluser/mpich ASCEND_DIR=/home/hccluser/Ascend/ascend-toolkit/latest # 执行 HCCL Test -# 512K -mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 -w 100 -n 500 -# 2M -mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 2m -e 2m -d fp32 -o sum -p 4 -w 100 -n 500 -# 64M -mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o sum -p 4 -w 100 -n 500 +# 1K +mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 1k -e 1k -d fp32 -o sum -p 4 -w 100 -n 500 +# 1M +mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 1m -e 1m -d fp32 -o sum -p 4 -w 100 -n 500 +# 1G +mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 1g -e 1g -d fp32 -o sum -p 4 -w 100 -n 500 ``` 各参数解释如下,详细说明可参考:[昇腾文档中心-HCCL 性能测试工具使用指南][9] @@ -215,14 +214,14 @@ mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 64m -e 64m -d fp32 -o s [9]: https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha003/devaids/hccltool/HCCLpertest_16_0001.html ```bash -mpirun -np 4 \ # MPI 进程数量 - taskset -c 0,2,4,6 \ # 将 MPI 进程绑定到 0,2,4,6 CPU 核(设置 CPU 亲和性,避免操作系统调度干扰,降低波动) +mpirun -np 8 \ # MPI 进程数量 + taskset -c 0,2,4,6,8,10,12,14\ # 将 MPI 进程绑定到 0,2,4,6,8,10,12,14 CPU 核(设置 CPU 亲和性,避免操作系统调度干扰,降低波动) ./bin/all_reduce_test \ # 可执行文件路径 - -b 512k \ # 测试数据大小的最小值,单位:Byte - -e 512k \ # 测试数据大小的最大值,单位:Byte + -b 1k \ # 测试数据大小的最小值,单位:Byte + -e 1k \ # 测试数据大小的最大值,单位:Byte -d fp32 \ # 测试数据的数据类型 -o sum \ # Reduce 操作类型 - -p 4 \ # NPU 数量 + -p 8 \ # NPU 数量 -w 100 \ # 预热迭代次数,不计入性能统计 -n 500 # 迭代次数 ``` @@ -243,13 +242,13 @@ export HCCL_TEST_PROFILING_PATH=/home/hccluser/prof # 执行 HCCLTest 用例 # 会在 /home/hccluser/prof 目录下生成 4 个文件夹,对应每张 NPU 卡 cd /home/hccluser/Ascend/ascend-toolkit/latest/tools/hccl_test -mpirun -np 4 taskset -c 0,2,4,6 ./bin/all_reduce_test -b 512k -e 512k -d fp32 -o sum -p 4 -w 100 -n 500 +mpirun -np 8 taskset -c 0,2,4,6,8,10,12,14 ./bin/all_reduce_test -b 1k -e 1k -d fp32 -o sum -p 8 -w 100 -n 500 # 导出 Profiling 结果 cd /home/hccluser/prof msprof --export=on --output=./ -# 把每张 NPU 的 Profiling 结果复制到 timeline 目录,包含 4 个 json 文件 +# 把每张 NPU 的 Profiling 结果复制到 timeline 目录,包含 8 个 json 文件 mkdir -p timeline cp -i PROF*/mindstudio_profiler_output/msprof*.json timeline/ ``` -- Gitee From 88a14e8ceaa198f37336f557e8a5972c33202adc Mon Sep 17 00:00:00 2001 From: fangmiao Date: Sat, 26 Jul 2025 03:38:09 +0000 Subject: [PATCH 11/11] update contest.md. Signed-off-by: fangmiao --- contest.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contest.md b/contest.md index 3821d2b..3135574 100644 --- a/contest.md +++ b/contest.md @@ -77,6 +77,10 @@ ssh root@ip -p port | |-- davinci1 # NPU2 | |-- davinci2 # NPU3 | `-- davinci3 # NPU4 +| |-- davinci4 # NPU5 +| |-- davinci5 # NPU6 +| |-- davinci6 # NPU7 +| `-- davinci7 # NPU8 |-- /usr/local/Ascend | `-- driver # NPU 驱动安装目录 |-- /home/hccluser/Ascend -- Gitee