From ea64c89f5d228e7b1ffd5b6ce4cfa929d6e21151 Mon Sep 17 00:00:00 2001 From: 4ever Date: Fri, 5 Dec 2025 22:38:35 +1030 Subject: [PATCH] update train and test for powerflownet, create readme_zh and readme_en --- .../applications/PowerFlowNet/README.md | 268 ++++++ .../applications/PowerFlowNet/README_EN.md | 268 ++++++ .../README_MINDSPORE_MIGRATION.md | 436 ---------- MindEnergy/applications/PowerFlowNet/infer.py | 411 +++++++++ .../PowerFlowNet/notebook_mindspore.ipynb | 547 ++++++++++++ .../PowerFlowNet/requirements.txt | 8 + .../applications/PowerFlowNet/src/__init__.py | 9 +- .../PowerFlowNet/src/argument_parser.py | 6 +- .../PowerFlowNet/src/custom_loss_functions.py | 6 +- .../PowerFlowNet/src/evaluation.py | 2 +- .../applications/PowerFlowNet/src/gnn_ops.py | 808 +++++++++--------- .../applications/PowerFlowNet/src/mpn.py | 61 +- .../PowerFlowNet/src/power_flow_data.py | 16 +- .../applications/PowerFlowNet/src/training.py | 2 +- MindEnergy/applications/PowerFlowNet/train.py | 513 +++++++++++ 15 files changed, 2465 insertions(+), 896 deletions(-) create mode 100644 MindEnergy/applications/PowerFlowNet/README.md create mode 100644 MindEnergy/applications/PowerFlowNet/README_EN.md delete mode 100644 MindEnergy/applications/PowerFlowNet/README_MINDSPORE_MIGRATION.md create mode 100644 MindEnergy/applications/PowerFlowNet/infer.py create mode 100644 MindEnergy/applications/PowerFlowNet/notebook_mindspore.ipynb create mode 100644 MindEnergy/applications/PowerFlowNet/requirements.txt create mode 100644 MindEnergy/applications/PowerFlowNet/train.py diff --git a/MindEnergy/applications/PowerFlowNet/README.md b/MindEnergy/applications/PowerFlowNet/README.md new file mode 100644 index 000000000..6cd8ccf3c --- /dev/null +++ b/MindEnergy/applications/PowerFlowNet/README.md @@ -0,0 +1,268 @@ +# PowerFlowNet - MindSpore 实现 + +MindSpore 版 PowerFlowNet 的完整实现,支持 CPU 和 Ascend NPU 设备。 + +[English Version](README_EN.md) + +## 概述 + +PowerFlowNet 利用消息传递图神经网络(GNN)进行高质量的电力潮流近似计算。本仓库提供完整的MindSpore 实现,包含 11 种 GNN 架构变体和完整的数据处理管道。 + +## 项目特点 + +✅ **完全自实现** - 零外部 GNN 库依赖(MessagePassing、TAGConv、degree 等) +✅ **Ascend NPU 优化** - 针对华为 Ascend 硬件优化,支持 PYNATIVE_MODE 高效运行 +✅ **11 种模型架构** - MLP、GCN、MPN 及其 7 种变体 +✅ **双数据格式支持** - PowerFlowData(12D)和 PowerFlowDataV2(4D,推荐) +✅ **经过验证** - 完整的对齐测试和数值稳定性验证 +✅ **Apache 2.0 License** - 基于原始 MIT 版本的合法衍生 + +## 项目结构 + +```text +powerflownet/ +├── src/ # 核心源代码 +│ ├── __init__.py # 包导出(MPN、PowerFlowData、PowerFlowDataV2) +│ ├── argument_parser.py # 参数解析(JSON 配置 + CLI) +│ ├── gnn_ops.py # GNN 操作(MessagePassing、TAGConv、degree) +│ ├── cpu_npu_ops.py # CPU/Ascend 兼容层 +│ ├── data_utils.py # 数据工具(Data、DataLoader、InMemoryDataset) +│ ├── power_flow_data.py # 电力潮流数据处理(5 个类,2 种格式) +│ ├── mpn.py # 消息传递网络(9 个 MPN 变体) +│ ├── gcn.py # 图卷积网络(GCN、SkipGCN) +│ ├── mlp.py # MLP 基线模型 +│ ├── training.py # 训练工具和回调函数 +│ ├── evaluation.py # 评估指标和验证 +│ ├── custom_loss_functions.py # 自定义损失函数 +│ └── __pycache__/ # Python 缓存 +├── configs/ # 配置文件 +│ └── config.py # 设备配置和 MindSpore 初始化 +├── data/ # 数据目录 +│ └── mindspore/ # MindSpore 格式数据(处理后和原始) +├── models/ # 保存的模型检查点 +│ ├── 14/ # 14 节点系统模型 +│ └── 14v2/ # 14 节点系统 V2 格式模型 +├── logs/ # 训练日志和结果 +│ ├── 14/ # 12D 数据格式训练日志 +│ └── 14v2/ # 4D V2 格式训练日志(推荐) +├── README.md # 中文文档 +├── README_EN.md # 英文文档 +├── README_MINDSPORE_MIGRATION.md # 迁移详细文档 +├── train.py # 训练脚本(原始 12D 格式) +├── test.py # 评估脚本 +├── requirements.txt # Python 依赖 +└── LICENSE # Apache 2.0 许可证 +``` + +## 快速开始 + +### 安装环境 + +```bash +# 创建 conda 环境 +conda create -n mind python=3.9 +conda activate mind + +# 安装依赖 +pip install -r requirements.txt +``` + +### 训练模型 + +```bash +# MLP 基线模型(快速) +python train.py --model mlp --case 14 --epochs 20 + +# MPN 消息传递网络(推荐) +python train.py --model mpn --case 14 --epochs 20 + +# GCN 图卷积网络 +python train.py --model gcn --case 14 --epochs 20 + +# 使用 V2 数据格式(推荐,4D 输入) +# 需要在 train.py 中切换到 PowerFlowDataV2 +``` + +### 评估模型 + +```bash +# 评估训练好的模型 +python test.py --model mlp --run_id +``` + +## 支持的模型 + +### 基础模型(3 种) + +| 模型 | 描述 | 参数量 | +|------|------|--------| +| `mlp` | 多层感知机基线 | 小 | +| `gcn` | 图卷积网络 | 中 | +| `mpn` | 消息传递网络 | 中 | + +### MPN 变体(8 种) + +| 模型 | 描述 | +|------|------| +| `skip_mpn` | 带跳接连接的 MPN | +| `mask_embed_mpn` | 带掩码嵌入的 MPN | +| `multi_mpn` | 多步消息传递 + 卷积 | +| `mask_embed_multi_mpn` | 掩码嵌入 + 多步 MP | +| `mask_embed_multi_mpn_nomp` | 掩码嵌入 + 多步卷积(无 MP) | +| `mpn_simplenet` | 简化的 MPN | +| `multi_conv_net` | 多平行卷积 | + +## 数据格式 + +### V2 格式(推荐,4D 输入) + +最优化和推荐的格式,适合 Ascend NPU: + +```text +├── node_features.npy # (N_samples, N_nodes, 4) - 归一化功率 +├── edge_features.npy # (N_samples, N_edges, 2) - 阻抗 +└── edge_index.npy # (2, N_edges) - 边连接 +``` + +### 原始格式(12D 输入) + +数据输入格式: + +```text +├── node_features.npy # (N_samples, N_nodes, 9) - one-hot + 特征 +├── edge_features.npy # (N_samples, N_edges, 7) - 多种边属性 +└── edge_index.npy # (2, N_edges) - 边连接 +``` + +数据集下载:[Surf Drive 链接](https://surfdrive.surf.nl/files/index.php/s/Qw4RHLvI2RPBIBL) + +## 关键特性说明 + +### 1. 完全自实现的 GNN 操作 + +- **MessagePassing**: 通用图神经网络基类,支持自定义聚合函数 +- **TAGConv**: 拓扑自适应图卷积,支持 k-hop 邻域聚合 +- **degree 函数**: 计算图节点度数,支持加权度数 + +### 2. Ascend NPU 优化 + +- PYNATIVE_MODE + JIT 级别 O0,确保 Ascend 兼容性 +- CPU/Ascend 兼容的操作层(gather、scatter、where) +- 无分布式模式强制(RANK_TABLE_FILE 移除) + +### 3. 数据处理管道 + +- **PowerFlowData**: 灵活的多格式数据加载(12D 格式) +- **PowerFlowDataV2**: 优化的向量化数据处理(4D 格式) +- **图批处理**: 支持将多个图合并为单一批次 +- **物理约束**: 归一化处理和特征约束 + +### 4. 完整的训练框架 + +- 灵活的参数解析(JSON 配置 + CLI) +- 训练回调和早停机制 +- 完整的评估指标(MAE、MSE、RMSE 等) + +## 环境要求 + +- **MindSpore**: >= 2.7.0 +- **Python**: 3.9.0 +- **NumPy**: >= 1.19.0 +- **tqdm**: 进度条 +- **matplotlib**: 可选,用于可视化 + +## 许可证说明 + +本项目采用 Apache 2.0 许可证。代码基于以下原始项目: + +**原始项目**:[PowerFlowNet (ericyangyu/PowerFlowNet)](https://github.com/stavrosorf/poweflownet) + +- 原始许可证:MIT License +- 迁移内容:框架适配、数据处理、模型架构 + +**主要改动**: + +- MindSpore 框架迁移 +- Ascend NPU 针对性优化 +- 数据处理管道重构和优化 + +## 引用 + +如果您使用本实现,请引用原始论文: + +```bibtex +@article{LIN2024110112, + title = {PowerFlowNet: Power flow approximation using message passing Graph Neural Networks}, + journal = {International Journal of Electrical Power & Energy Systems}, + volume = {160}, + pages = {110112}, + year = {2024}, + issn = {0142-0615}, + doi = {https://doi.org/10.1016/j.ijepes.2024.110112}, + author = {Nan Lin and Stavros Orfanoudakis and Nathan Ordonez Cardenas and Juan S. Giraldo and Pedro P. Vergara}, +} +``` + +## 快速参考 + +### 导入模型和数据 + +```python +from src import MPN, PowerFlowDataV2 +from src.data_utils import DataLoader + +# 加载模型 +model = MPN(nfeature_dim=4, efeature_dim=2, output_dim=4, + hidden_dim=64, n_gnn_layers=3, k=3, dropout_rate=0.1) + +# 加载数据 +dataset = PowerFlowDataV2(data_path='data/mindspore', case=14) +loader = DataLoader(dataset, batch_size=32) +``` + +### 训练循环 + +```python +import mindspore as ms +from mindspore import nn + +optimizer = nn.optim.Adam(model.trainable_params(), learning_rate=1e-3) +loss_fn = nn.MSELoss() + +for epoch in range(20): + for batch in loader: + def forward_fn(data): + pred = model(data) + loss = loss_fn(pred, data.y) + return loss + + loss, grads = ms.value_and_grad(forward_fn, weights=model.trainable_params())(batch) + optimizer(grads) +``` + +## 故障排查 + +### 问题:Ascend 编译错误 + +**症状**:`RuntimeError: Can not find kernel tensor for node` +**解决**:确保在 config.py 中设置了正确的设备模式: + +```python +ms.set_context(device_target="Ascend", mode=ms.PYNATIVE_MODE, jit_config=ms.JitConfig(jit_level="O0")) +``` + +### 问题:内存溢出 + +**症状**:OOM 错误 +**解决**:减小批次大小或使用 PowerFlowDataV2(更高效的内存使用) + +### 问题:数据加载失败 + +**症状**:文件未找到 +**解决**:确保数据文件在 `data/mindspore/processed/` 目录中 + +## 文档资源 + +- [src/argument_parser.py](src/argument_parser.py) - 参数解析文档。 +- [src/mpn.py](src/mpn.py) - MPN 架构说明。 +- [src/power_flow_data.py](src/power_flow_data.py) - 数据处理详解。 diff --git a/MindEnergy/applications/PowerFlowNet/README_EN.md b/MindEnergy/applications/PowerFlowNet/README_EN.md new file mode 100644 index 000000000..8bab885e9 --- /dev/null +++ b/MindEnergy/applications/PowerFlowNet/README_EN.md @@ -0,0 +1,268 @@ +# PowerFlowNet - MindSpore Implementation + +A complete PowerFlowNet in MindSpore version, supporting both CPU and Ascend NPU devices. + +[中文版本](README.md) + +## Overview + +PowerFlowNet leverages Message Passing Graph Neural Networks (GNNs) for high-quality power flow approximation. This repository provides a MindSpore version, featuring 11 GNN architecture variants and a complete data processing pipeline. + +## Features + +✅ **Self-Implemented** - Zero external GNN library dependency (MessagePassing, TAGConv, degree) +✅ **Ascend NPU Optimized** - Optimized for Huawei Ascend hardware, efficient PYNATIVE_MODE execution +✅ **11 Model Architectures** - MLP, GCN, MPN and 8 variants +✅ **Dual Data Format Support** - PowerFlowData (12D) and PowerFlowDataV2 (4D, recommended) +✅ **Fully Verified** - Complete alignment tests and numerical stability verification +✅ **Apache 2.0 License** - Legal derivative of the original MIT version + +## Project Structure + +```text +powerflownet/ +├── src/ # Core source code +│ ├── __init__.py # Package exports (MPN, PowerFlowData, PowerFlowDataV2) +│ ├── argument_parser.py # Argument parsing (JSON config + CLI) +│ ├── gnn_ops.py # GNN operations (MessagePassing, TAGConv, degree) +│ ├── cpu_npu_ops.py # CPU/Ascend compatibility layer +│ ├── data_utils.py # Data utilities (Data, DataLoader, InMemoryDataset) +│ ├── power_flow_data.py # Power flow data processing (5 classes, 2 formats) +│ ├── mpn.py # Message Passing Networks (9 MPN variants) +│ ├── gcn.py # Graph Convolutional Networks (GCN, SkipGCN) +│ ├── mlp.py # MLP baseline model +│ ├── training.py # Training utilities and callbacks +│ ├── evaluation.py # Evaluation metrics and validation +│ ├── custom_loss_functions.py # Custom loss functions +│ └── __pycache__/ # Python cache +├── configs/ # Configuration files +│ └── config.py # Device configuration and MindSpore initialization +├── data/ # Data directory +│ └── mindspore/ # MindSpore format data (processed and raw) +├── models/ # Saved model checkpoints +│ ├── 14/ # 14-bus system models +│ └── 14v2/ # 14-bus system V2 format models +├── logs/ # Training logs and results +│ ├── 14/ # Training logs for 12D data format +│ └── 14v2/ # Training logs for 4D V2 format (recommended) +├── README.md # Chinese documentation +├── README_EN.md # English documentation +├── README_MINDSPORE_MIGRATION.md # Detailed migration guide +├── train.py # Training script (original 12D format) +├── test.py # Evaluation script +├── requirements.txt # Python dependencies +└── LICENSE # Apache 2.0 License +``` + +## Quick Start + +### Installation + +```bash +# Create conda environment +conda create -n mind python=3.9 +conda activate mind + +# Install dependencies +pip install -r requirements.txt +``` + +### Training + +```bash +# MLP baseline (fast) +python train.py --model mlp --case 14 --epochs 20 + +# MPN Message Passing Network (recommended) +python train.py --model mpn --case 14 --epochs 20 + +# GCN Graph Convolutional Network +python train.py --model gcn --case 14 --epochs 20 + +# Using V2 data format (recommended, 4D input) +# Switch to PowerFlowDataV2 in train.py +``` + +### Evaluation + +```bash +# Evaluate trained model +python test.py --model mlp --run_id +``` + +## Supported Models + +### Base Models (3 types) + +| Model | Description | Parameters | +|-------|-------------|------------| +| `mlp` | Multi-Layer Perceptron baseline | Small | +| `gcn` | Graph Convolutional Network | Medium | +| `mpn` | Message Passing Network | Medium | + +### MPN Variants (8 types) + +| Model | Description | +|-------|-------------| +| `skip_mpn` | MPN with skip connections | +| `mask_embed_mpn` | MPN with mask embedding | +| `multi_mpn` | Multi-step MP + convolution | +| `mask_embed_multi_mpn` | Mask embedding + multi-step MP | +| `mask_embed_multi_mpn_nomp` | Mask embedding + multi-step conv (no MP) | +| `mpn_simplenet` | Simplified MPN | +| `multi_conv_net` | Multi-parallel convolutions | + +## Data Formats + +### V2 Format (Recommended, 4D input) + +Optimized and recommended format for Ascend NPU: + +```text +├── node_features.npy # (N_samples, N_nodes, 4) - Normalized power +├── edge_features.npy # (N_samples, N_edges, 2) - Impedance +└── edge_index.npy # (2, N_edges) - Edge connectivity +``` + +### Original Format (12D input) + +Data input format: + +```text +├── node_features.npy # (N_samples, N_nodes, 9) - one-hot + features +├── edge_features.npy # (N_samples, N_edges, 7) - Multiple edge attributes +└── edge_index.npy # (2, N_edges) - Edge connectivity +``` + +Download datasets from: [Surf Drive Link](https://surfdrive.surf.nl/files/index.php/s/Qw4RHLvI2RPBIBL) + +## Key Features + +### 1. Self-Implemented GNN Operations + +- **MessagePassing**: Generic GNN base class supporting custom aggregation +- **TAGConv**: Topology-aware graph convolution with k-hop neighborhood aggregation +- **degree function**: Compute node degree in graphs, supports weighted degree + +### 2. Ascend NPU Optimization + +- PYNATIVE_MODE + JIT level O0 for Ascend compatibility +- CPU/Ascend compatible operation layer (gather, scatter, where) +- No forced distributed mode (RANK_TABLE_FILE removed) + +### 3. Data Processing Pipeline + +- **PowerFlowData**: Flexible multi-format data loading (12D format) +- **PowerFlowDataV2**: Optimized vectorized data processing (4D format) +- **Graph batching**: Merge multiple graphs into single batch +- **Physics constraints**: Normalization and feature constraints + +### 4. Complete Training Framework + +- Flexible argument parsing (JSON config + CLI) +- Training callbacks and early stopping +- Complete evaluation metrics (MAE, MSE, RMSE, etc.) + +## Requirements + +- **MindSpore**: >= 2.7.0 +- **Python**: 3.9.0 +- **NumPy**: >= 1.19.0 +- **tqdm**: Progress bar +- **matplotlib**: Optional, for visualization + +## License + +This project is licensed under the Apache 2.0 License. The code is derived from: + +**Original Project**: [PowerFlowNet (ericyangyu/PowerFlowNet)](https://github.com/stavrosorf/poweflownet) + +- Original License: MIT License +- Migrated Content: Framework adaptation, data processing, model architecture + +**Major Changes**: + +- MindSpore framework migration +- Ascend NPU targeted optimization +- Data processing pipeline refactoring and optimization + +## Citation + +If you use this implementation, please cite the original paper: + +```bibtex +@article{LIN2024110112, + title = {PowerFlowNet: Power flow approximation using message passing Graph Neural Networks}, + journal = {International Journal of Electrical Power & Energy Systems}, + volume = {160}, + pages = {110112}, + year = {2024}, + issn = {0142-0615}, + doi = {https://doi.org/10.1016/j.ijepes.2024.110112}, + author = {Nan Lin and Stavros Orfanoudakis and Nathan Ordonez Cardenas and Juan S. Giraldo and Pedro P. Vergara}, +} +``` + +## Quick Reference + +### Import Models and Data + +```python +from src import MPN, PowerFlowDataV2 +from src.data_utils import DataLoader + +# Load model +model = MPN(nfeature_dim=4, efeature_dim=2, output_dim=4, + hidden_dim=64, n_gnn_layers=3, k=3, dropout_rate=0.1) + +# Load data +dataset = PowerFlowDataV2(data_path='data/mindspore', case=14) +loader = DataLoader(dataset, batch_size=32) +``` + +### Training Loop + +```python +import mindspore as ms +from mindspore import nn + +optimizer = nn.optim.Adam(model.trainable_params(), learning_rate=1e-3) +loss_fn = nn.MSELoss() + +for epoch in range(20): + for batch in loader: + def forward_fn(data): + pred = model(data) + loss = loss_fn(pred, data.y) + return loss + + loss, grads = ms.value_and_grad(forward_fn, weights=model.trainable_params())(batch) + optimizer(grads) +``` + +## Troubleshooting + +### Issue: Ascend Compilation Error + +**Symptom**: `RuntimeError: Can not find kernel tensor for node` +**Solution**: Ensure correct device mode in config.py: + +```python +ms.set_context(device_target="Ascend", mode=ms.PYNATIVE_MODE, jit_config=ms.JitConfig(jit_level="O0")) +``` + +### Issue: Out of Memory + +**Symptom**: OOM error +**Solution**: Reduce batch size or use PowerFlowDataV2 (more memory efficient) + +### Issue: Data Loading Failed + +**Symptom**: File not found +**Solution**: Ensure data files are in `data/mindspore/processed/` directory + +## Documentation Resources + +- [src/argument_parser.py](src/argument_parser.py) - Argument parsing documentation. +- [src/mpn.py](src/mpn.py) - MPN architecture explanation. +- [src/power_flow_data.py](src/power_flow_data.py) - Data processing details. diff --git a/MindEnergy/applications/PowerFlowNet/README_MINDSPORE_MIGRATION.md b/MindEnergy/applications/PowerFlowNet/README_MINDSPORE_MIGRATION.md deleted file mode 100644 index 69599b419..000000000 --- a/MindEnergy/applications/PowerFlowNet/README_MINDSPORE_MIGRATION.md +++ /dev/null @@ -1,436 +0,0 @@ -# MindSpore PowerFlowNet 迁移文档 - -## 概述 - -本文档说明 PowerFlowNet 从 PyTorch 到 MindSpore 的迁移进度,以及如何使用 MindSpore 版本进行训练和测试。 - -## 迁移状态 - -### ✅ 已完成迁移 - -- **src/** - 所有神经网络模型(MLP、MPN、GCN 等 9 个模型) -- **gnn_ops.py** - 自定义 GNN 操作(TAGConv、GCNConv 等) -- **custom_loss_functions.py** - 3 种自定义损失函数 -- **power_flow_data.py** - 完整数据加载系统(遗留格式和 V2 格式) -- **data_utils.py** - 数据结构和工具函数 -- **cpu_npu_ops.py** - CPU/NPU/Ascend 兼容操作集 -- **configs/config.py** - 设备配置和初始化 - -### 📝 本 PR 包含 - -- ✅ `src/` 文件夹:11 个 Python 文件,所有模型和工具 -- ✅ `configs/` 文件夹:1 个 Python 文件,设备管理和配置 -- ✅ 每个文件都包含 Apache License 2.0 copyright 头 -- ✅ 完整的数据加载和处理管道 -- ✅ Ascend 设备优化 - -### ⚠️ 本 PR 不包含 - -- 训练脚本(train.py) -- 测试脚本 -- 数据文件和预训练模型 -- 基准测试代码 - -## 项目结构 - -```text -powerflownet/ -├── src/ -│ ├── __init__.py # 模块导出和公共接口 -│ ├── mlp.py # MLP 模型实现 -│ ├── mpn.py # MPN、SkipMPN、MaskEmbdMPN 等多个 GNN 模型 -│ ├── gcn.py # Graph Convolutional Network -│ ├── gnn_ops.py # GNN 基础操作 (MessagePassing, TAGConv, GCNConv) -│ ├── custom_loss_functions.py # 自定义损失函数 (MaskedL2Loss, PowerImbalance, etc.) -│ ├── power_flow_data.py # 数据加载类 (PowerFlowData, PowerFlowDataV2 等) -│ ├── data_utils.py # 数据工具函数和数据结构 -│ ├── cpu_npu_ops.py # CPU/NPU/Ascend 兼容操作 -│ ├── argument_parser.py # 参数解析工具 -│ ├── training.py # 训练工具函数(可选) -│ └── evaluation.py # 评估工具函数(可选) -├── configs/ -│ └── config.py # 设备配置和初始化 (DeviceConfig 类) -└── README_MINDSPORE_MIGRATION.md # 本文档 -``` - -### 核心模块说明 - -| 模块 | 功能 | 关键内容 | -|------|------|---------| -| **mlp.py** | MLP 模型 | MLPNet 类 | -| **mpn.py** | 消息传递网络族 | MPN, SkipMPN, MaskEmbdMPN, MultiMPN, MaskEmbdMultiMPN, MaskEmbdMultiMPNNoMP, MultiConvNet, MPNSimplenet, WrappedMultiConv | -| **gcn.py** | 图卷积网络 | GCNNet 类 | -| **gnn_ops.py** | GNN 基础操作 | MessagePassing, TAGConv, GCNConv, degree 等 | -| **custom_loss_functions.py** | 损失函数 | MaskedL2Loss, PowerImbalance, MixedMSEPowerImbalance | -| **power_flow_data.py** | 数据加载 | PowerFlowData, PowerFlowDataLoader, PowerFlowDataV2, PowerFlowDataLoaderV2 | -| **data_utils.py** | 数据工具 | Data, InMemoryDataset, Graph, DataLoader 等数据结构 | -| **cpu_npu_ops.py** | Ascend 优化操作 | gather, pow, where, degree, randint 等在 Ascend 上的优化实现 | -| **config.py** | 设备配置 | DeviceConfig 类,支持 CPU/GPU/Ascend 设备初始化 | - -## MindSpore 版本兼容性 - -- **推荐**: MindSpore 2.7.0+ -- **设备支持**: - - Ascend (华为云芯片) - -## Ascend 设备特殊处理 - -本迁移包含针对 Ascend 设备的重要优化: - -### 环境变量处理 - -```python -# 训练脚本需要在导入之前添加以下代码 -if 'RANK_TABLE_FILE' in os.environ: - del os.environ['RANK_TABLE_FILE'] -``` - -**原因**: `RANK_TABLE_FILE` 会强制 Ascend 启用分布式训练模式,导致 JIT level 被设置为 O2,造成优化器编译问题。 - -### 上下文配置 - -```python -import mindspore as ms - -# 使用 PYNATIVE_MODE(推荐) -ms.set_context(mode=ms.PYNATIVE_MODE) - -# 对于 Ascend,禁用 JIT 优化 -if device == 'Ascend': - ms.set_context(jit_config={"jit_level": "O0"}) -``` - -## 自实现的 torch_geometric 模块 - -本 PR 不依赖 torch_geometric,而是完整实现了核心的图神经网络模块(在 MindSpore 框架中)。以下是实现的内容对照表: - -### data_utils.py - 数据处理模块 - -| torch_geometric | 本实现 | 说明 | -|---|---|---| -| `torch_geometric.data.Data` | `Data` 类 | 图数据容器,支持任意属性 | -| `torch_geometric.data.InMemoryDataset` | `InMemoryDataset` 类 | 内存数据集基类,支持预处理和缓存 | -| `torch_geometric.data.Graph` | `Graph` 类 | 图数据结构,包含节点/边特征 | -| `torch_geometric.loader.DataLoader` | `DataLoader` 类 | 数据加载器,支持批处理和混洗 | -| `torch_geometric.utils.train_test_split` | `create_data_splits()` | 数据集划分为训练/验证/测试集 | - -**关键特性**: - -- ✅ 完全独立的实现,无外部依赖 -- ✅ 支持 MindSpore Tensor 和 NumPy 数组 -- ✅ 动态属性管理 -- ✅ 内存高效的批处理 - -### gnn_ops.py - GNN 操作模块 - -| torch_geometric | 本实现 | 说明 | -|---|---|---| -| `torch_geometric.nn.MessagePassing` | `MessagePassing` 类 | 消息传递基类,支持 add/mean/max 聚集 | -| `torch_geometric.nn.TAGConv` | `TAGConv` 类 | 拓扑自适应图卷积 | -| `torch_geometric.nn.GCNConv` | `GCNConv` 类 | 图卷积网络层 | -| `torch_geometric.utils.degree` | `degree()` 函数 | 计算节点度数 | -| `torch_geometric.utils.to_undirected` | `to_undirected()` 函数 | 转换为无向图 | - -**MessagePassing 特性**: - -- ✅ 聚集方法:add, mean, max -- ✅ 消息传递流程:message() → aggregate() → update() -- ✅ 自动参数过滤(使用 inspect 模块) -- ✅ CPU/NPU 兼容的 segment 操作 - -**TAGConv 实现亮点**: - -- ✅ k-hop 拓扑适应性聚集 -- ✅ 对称归一化 (D^-0.5 A D^-0.5) -- ✅ 完全匹配原论文和 torch_geometric 实现 -- ✅ 数值对齐测试通过 - -**GCNConv 特性**: - -- ✅ 标准图卷积 -- ✅ 对称归一化 -- ✅ 自环处理 - -**兼容操作**: - -- ✅ 所有操作都使用 MindSpore mint 和 ops 实现 -- ✅ 针对 Ascend 设备优化 -- ✅ 避免索引操作,使用矩阵乘法替代 - -## 关键技术点 - -### 1. TAGConv 算子 - -- MPN 中使用的 TAGConv (Topology Adaptive GNN Convolution) -- 自定义实现,支持 CPU/Ascend -- 数值对齐测试完成 - -### 2. Ascend 优化操作 - -提供了以下操作的 Ascend 优化实现(在 `cpu_npu_ops.py`): - -- `gather_cpu_npu_compatible()` - 张量聚集操作 -- `pow_cpu_npu_compatible()` - 张量幂次运算 -- `where_cpu_npu_compatible()` - 条件选择 -- `degree_cpu_npu_compatible()` - 图度数计算 -- `randint_like_cpu_npu_compatible()` - 随机整数生成 - -### 3. 损失函数 - -- **MaskedL2Loss** - 基础掩码 L2 损失,用于关键节点预测 -- **PowerImbalance** - 物理信息损失,基于功率不平衡 -- **MixedMSEPowerImbalance** - 混合损失,综合 MSE 和功率不平衡 - -### 4. 数据处理 - -支持两种数据集格式的无缝切换: - -- **PowerFlowDataV2** - 100K 样本,4D 特征,推荐使用 -- **PowerFlowData** - 较小数据集,12D 特征,向后兼容 - -## 数据集格式 - -支持两种数据集格式: - -### V2 格式(推荐) - -- 100K 样本 -- 4D 节点特征:[voltage_mag, voltage_angle, Pd, Qd] -- 使用 `PowerFlowDataV2` 和 `PowerFlowDataLoaderV2` -- case 名称后缀: `v2`(如 `14v2`, `118v2`) - -### legacy格式 - -- 较小数据集 -- 12D 节点特征 -- 使用 `PowerFlowData` 和 `PowerFlowDataLoader` -- case 名称无后缀(如 `14`, `118`) - -## 代码质量和兼容性 - -### ✅ 代码规范 - -- 所有文件都有 Apache License 2.0 copyright 头 -- 遵循 Python PEP 8 风格指南 -- 完整的文档字符串和类型注解 - -### ✅ 数值验证 - -- 所有模型在 CPU和Ascend 上数值一致 -- 梯度计算通过对数检验(Gradient Checker) -- TAGConv 操作完整对齐测试 - -### ✅ 设备兼容性 - -- **Ascend** - 华为云完全支持(含特殊优化) - -### ✅ MindSpore 版本 - -- 最低版本:2.0.0 -- 推荐版本:2.7.0+ -- 向后兼容性:支持大部分 2.x 版本 - -## 已知问题和解决方案 - -### Ascend 首个 epoch 耗时长 - -**问题**: 第一个 epoch 可能需要 15-30 秒 -**原因**: Ascend 需要编译计算图 -**解决**: 这是正常行为,后续 epoch 会快速完成 - -### PYTHONPATH 警告 - -```text -Can not find the tbe operator implementation -``` - -**问题**: TBE (Tensor Boost Engine) 路径配置 -**影响**: 无,MindSpore 使用其他算子实现 -**处理**: 可以安全忽略 - -## 快速开始 - -### 安装依赖 - -```bash -# MindSpore (CPU/GPU) -pip install mindspore - -# MindSpore Ascend (在华为云环境) -# 按照官方文档安装:https://www.mindspore.cn/install - -# 其他依赖 -pip install numpy torch-geometric h5py matplotlib -``` - -### 导入使用 - -```python -from powerflownet.src import MLPNet, MPN, GCNNet -from powerflownet.src import PowerFlowDataV2, PowerFlowDataLoaderV2 -from powerflownet.configs import init_device - -# 初始化设备 -init_device('Ascend') # 或 'CPU', 'GPU' - -# 创建模型 -model = MPN( - nfeature_dim=4, - efeature_dim=2, - output_dim=4, - hidden_dim=64, - n_gnn_layers=3, - k=3 -) - -# 加载数据 -dataset = PowerFlowDataV2(root='./data', case='14v2', split=[0.7, 0.15, 0.15], task='train') -dataloader = PowerFlowDataLoaderV2(dataset, batch_size=256, shuffle=True) - -# 训练 -import mindspore as ms -from mindspore import nn - -optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001) -loss_fn = MaskedL2Loss(regularize=False) - -def forward_fn(batch): - pred = model(batch) - loss = loss_fn(pred, batch.y, batch.pred_mask) - return loss - -grad_fn = ms.value_and_grad(forward_fn, None, model.trainable_params()) - -for batch in dataloader: - loss, grads = grad_fn(batch) - optimizer(grads) -``` - -### 完整训练示例 - -详见项目中的 `train.py` 脚本(不包含在本 PR 中)。 - -## 支持的模型列表 - -本 PR 包含以下 **9 个 GNN 模型**的完整 MindSpore 实现: - -### 基础模型 - -1. **MLPNet** - 多层感知机,用作基准模型 -2. **GCNNet** - 图卷积网络 - -### 消息传递网络族(MPN) - -3. **MPN** - 基础消息传递网络(带 TAGConv) -4. **SkipMPN** - 带跳跃连接的 MPN -5. **MaskEmbdMPN** - 带掩码嵌入的 MPN -6. **MultiMPN** - 多头 MPN -7. **MaskEmbdMultiMPN** - 带掩码嵌入的多头 MPN -8. **MaskEmbdMultiMPNNoMP** - 无消息传递的多头 MPN -9. **MPNSimplenet** - 简化版 MPN - -### 高级模型 - -10. **MultiConvNet** - 多卷积网络 -11. **WrappedMultiConv** - 包装的多卷积网络 - -### 推荐使用 - -- **数据格式:4D 特征** → mlp, mpn, gcn, mask_embd_multi_mpn, mpn_simplenet -- **数据格式:12D 特征** → skip_mpn, mask_embd_mpn, multi_mpn, mask_embd_multi_mpn_nomp, multi_conv_net - -## API 参考 - -### 创建模型 - -```python -# MLP -from powerflownet.src import MLPNet -model = MLPNet(nfeature_dim=4, output_dim=4, hidden_dim=64, n_layers=3) - -# MPN 系列 -from powerflownet.src import MPN, SkipMPN, MaskEmbdMultiMPN -model = MPN(nfeature_dim=4, efeature_dim=2, output_dim=4, hidden_dim=64, n_gnn_layers=3, k=3) -model = SkipMPN(nfeature_dim=4, efeature_dim=2, output_dim=4, hidden_dim=64, n_gnn_layers=3, k=3) -model = MaskEmbdMultiMPN(nfeature_dim=4, efeature_dim=2, output_dim=4, hidden_dim=64, n_gnn_layers=3, k=3) - -# GCN -from powerflownet.src import GCNNet -model = GCNNet(nfeature_dim=4, output_dim=4, hidden_dim=64, n_gnn_layers=3) -``` - -### 数据加载 - -```python -# V2 格式(推荐) -from powerflownet.src import PowerFlowDataV2, PowerFlowDataLoaderV2 -dataset = PowerFlowDataV2(root='./data', case='14v2', split=[0.7, 0.15, 0.15], task='train') -loader = PowerFlowDataLoaderV2(dataset, batch_size=256, shuffle=True) - -# 遗留格式 -from powerflownet.src import PowerFlowData, PowerFlowDataLoader -dataset = PowerFlowData(root='./data', case='14', split=[0.7, 0.15, 0.15], task='train') -loader = PowerFlowDataLoader(dataset, batch_size=256, shuffle=True) -``` - -### 设备初始化 - -```python -from powerflownet.configs import init_device, DeviceConfig - -# 初始化设备 -init_device('Ascend') # 'CPU', 'GPU', 'Ascend' - -# 获取设备配置 -config = DeviceConfig() -device = config.get_device_target() -``` - -## 常见问题 - -### Q: 如何在 Ascend 上运行? - -A: 需要在训练脚本最开始添加: - -```python -if 'RANK_TABLE_FILE' in os.environ: - del os.environ['RANK_TABLE_FILE'] -import mindspore as ms -ms.set_context(mode=ms.PYNATIVE_MODE) -``` - -### Q: 4D 和 12D 特征有什么区别? - -A: 4D 特征(推荐):[voltage_mag, voltage_angle, Pd, Qd] -12D 特征(遗留):[one-hot(4) + features(4) + mask(4)] - -### Q: 是否支持分布式训练? - -A: 本 PR 包含的代码不支持分布式训练。如需分布式,需要额外配置。 - -### Q: 如何扩展新的损失函数? - -A: 在 `custom_loss_functions.py` 中继承 `nn.Cell`,实现 `construct()` 方法。 - -## 贡献指南 - -本 PR 专注于模型和配置的核心迁移。如需: - -- 添加新模型 → 参考 `mpn.py` 的编写风格 -- 新损失函数 → 参考 `custom_loss_functions.py` -- 优化 Ascend 性能 → 参考 `cpu_npu_ops.py` - -请确保: - -1. 添加 Apache License 2.0 copyright 头 -2. 包含完整的文档字符串 -3. 在三个平台(CPU/GPU/Ascend)上测试数值 - -## 许可证 - -Apache License 2.0 - 详见每个源文件的 copyright 头部 - -## 相关资源 - -- **MindSpore 官方**: -- **原始 PyTorch 版本**: [PowerFlowNet GitHub](https://github.com/...) diff --git a/MindEnergy/applications/PowerFlowNet/infer.py b/MindEnergy/applications/PowerFlowNet/infer.py new file mode 100644 index 000000000..dbb250524 --- /dev/null +++ b/MindEnergy/applications/PowerFlowNet/infer.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python3 + +# Copyright 2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This file is a derivative work based on the original PowerFlowNet implementation +# (https://github.com/stavrosorf/poweflownet) which was licensed under the MIT License. +# Significant modifications have been made to adapt the code for the MindSpore framework, +# including MindSpore equivalents and +# optimization for Ascend hardware acceleration. +# ============================================================================ +""" +Evaluation Script for MindSpore PowerFlowNet + +Tests trained models and computes detailed loss metrics. + +Usage: + python test.py --model mlp --run_id --loss masked_l2 + python test.py --model mpn --run_id --loss masked_l2v2 + python test.py --model skip_mpn --run_id --loss masked_l1 +""" + +import traceback +import argparse +import sys +from pathlib import Path +from typing import Dict, Optional +import mindspore as ms +from mindspore import nn, context +from src import ( + MPN, SkipMPN, MaskEmbedMPN, MultiMPN, MaskEmbedMultiMPN, + MaskEmbedMultiMPNNoMP, MultiConvNet, MPNSimplenet, MLPNet, GCNNet +) +from src.power_flow_data import PowerFlowData, PowerFlowDataV2, PowerFlowDataLoader, PowerFlowDataLoaderV2 +from src.evaluation import num_params +from src.custom_loss_functions import ( + MaskedL2Loss, MaskedL2V2, MaskedL1, + PowerImbalance, MixedMSEPowerImbalance +) +from tqdm import tqdm + +# Set default context to PYNATIVE_MODE for better compatibility with complex operations +context.set_context(mode=context.PYNATIVE_MODE) +ms.set_device(device_target='CPU') + +# Add utils to path +sys.path.insert(0, str(Path(__file__).parent)) + +def evaluate_simple(model, loader, loss_fn): + """ + Simple evaluation matching train_v2.py logic exactly. + Uses batch-average loss (same as training). + """ + model.set_train(False) + total_loss = 0.0 + num_batches = 0 + + pbar = tqdm(loader, desc='Evaluating') + for batch in pbar: + pred = model(batch) + loss = loss_fn(pred, batch.y, batch.pred_mask) + total_loss += float(loss.asnumpy()) + num_batches += 1 + pbar.set_postfix({'loss': f'{total_loss / num_batches:.6f}'}) + + return total_loss / max(1, num_batches) + + +def _get_gnn_model_params(config: Dict) -> Dict: + """Extract GNN-specific model parameters from config.""" + return { + 'nfeature_dim': config['nfeature_dim'], + 'efeature_dim': config.get('efeature_dim', 2), + 'output_dim': config['output_dim'], + 'hidden_dim': config['hidden_dim'], + 'n_gnn_layers': config['n_layers'], + 'k': config.get('k', 3), + 'dropout_rate': config.get('dropout_rate', 0.0), + } + + +def _get_mlp_params(config: Dict) -> Dict: + """Extract MLP model parameters from config.""" + return { + 'nfeature_dim': config['nfeature_dim'], + 'output_dim': config['output_dim'], + 'hidden_dim': config['hidden_dim'], + 'n_layers': config['n_layers'], + 'dropout_rate': config.get('dropout_rate', 0.0), + } + + +def _get_gcn_params(config: Dict) -> Dict: + """Extract GCN model parameters from config.""" + return { + 'nfeature_dim': config['nfeature_dim'], + 'output_dim': config['output_dim'], + 'hidden_dim': config['hidden_dim'], + 'n_gnn_layers': config['n_layers'], + 'dropout_rate': config.get('dropout_rate', 0.0), + } + + +def create_model(model_name: str, config: Dict) -> nn.Cell: + """ + Create model instance based on name. + + Args: + model_name: Model type identifier + config: Configuration dictionary with model parameters + + Returns: + Instantiated MindSpore model + """ + model_name_lower = model_name.lower() + + # MLP model + if model_name_lower == 'mlp': + return MLPNet(**_get_mlp_params(config)) + + # GCN model uses n_gnn_layers + if model_name_lower == 'gcn': + return GCNNet(**_get_gcn_params(config)) + + # GNN models using n_gnn_layers parameter + gnn_params = _get_gnn_model_params(config) + + gnn_models = { + 'mpn': MPN, + 'skip_mpn': SkipMPN, + 'mask_embed_mpn': MaskEmbedMPN, + 'multi_mpn': MultiMPN, + 'mask_embed_multi_mpn': MaskEmbedMultiMPN, + 'mask_embed_multi_mpn_nomp': MaskEmbedMultiMPNNoMP, + 'multi_conv_net': MultiConvNet, + 'mpn_simplenet': MPNSimplenet, + } + + if model_name_lower in gnn_models: + return gnn_models[model_name_lower](**gnn_params) + + raise ValueError(f"Unknown model: {model_name}") + + +def load_loss_function(loss_name: str, config: Optional[Dict] = None): + """ + Load loss function by name. + + Args: + loss_name: Loss function identifier + config: Optional configuration for loss function + + Returns: + Instantiated loss function + """ + loss_name = loss_name.lower() + + if loss_name == 'masked_l2': + return MaskedL2Loss() + + if loss_name == 'masked_l2v2': + return MaskedL2V2() + + if loss_name == 'masked_l1': + return MaskedL1() + + if loss_name == 'power_imbalance': + if config is None: + raise ValueError("PowerImbalance requires config with normalization parameters") + return PowerImbalance( + xymean=config.get('xymean'), + xystd=config.get('xystd'), + edgemean=config.get('edgemean'), + edgestd=config.get('edgestd') + ) + + if loss_name == 'mixed_mse_power_imbalance': + if config is None: + raise ValueError("MixedMSEPowerImbalance requires config with normalization parameters") + return MixedMSEPowerImbalance( + xymean=config.get('xymean'), + xystd=config.get('xystd'), + edgemean=config.get('edgemean'), + edgestd=config.get('edgestd'), + alpha=config.get('alpha', 0.9) + ) + + raise ValueError(f"Unknown loss function: {loss_name}") + + +def _setup_argument_parser() -> argparse.ArgumentParser: + """Setup and return argument parser for test script.""" + parser = argparse.ArgumentParser( + description="Evaluate trained PowerFlowNet models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python test.py --model mpn --run_id exp_001 --loss masked_l2 + python test.py --model mask_embed_mpn --run_id exp_002 --loss masked_l2v2 + python test.py --model multi_mpn --run_id exp_003 --loss masked_l1 + """ + ) + + parser.add_argument('--model', type=str, default='mpn', + choices=['mlp', 'gcn', 'mpn', 'skip_mpn', 'mask_embed_mpn', + 'multi_mpn', 'mask_embed_multi_mpn', 'mask_embed_multi_mpn_nomp', + 'multi_conv_net', 'mpn_simplenet'], + help='Model architecture to evaluate') + parser.add_argument('--run_id', type=str, required=True, + help='Run ID of the saved model checkpoint') + parser.add_argument('--loss', type=str, default='masked_l2', + choices=['masked_l2', 'masked_l2v2', 'masked_l1', + 'power_imbalance', 'mixed_mse_power_imbalance'], + help='Loss function for evaluation (default: masked_l2)') + parser.add_argument('--data_dir', type=str, default='data', + help='Directory containing data files') + parser.add_argument('--case', type=str, default='14', + help='Case name. Use v2 suffix (e.g., 14v2, 118v2) for V2 format, ' + 'otherwise legacy format (e.g., 14, 118)') + parser.add_argument('--device', type=str, default='CPU', + choices=['CPU', 'GPU', 'Ascend'], + help='Computation device') + parser.add_argument('--batch_size', type=int, default=64, + help='Batch size for evaluation') + + return parser + + +def _print_header(args) -> None: + """Print evaluation header.""" + print("\n{'='*60}") + print("PowerFlowNet MindSpore Evaluation") + print(f"{'='*60}") + print(f"Model: {args.model}") + print(f"Run ID: {args.run_id}") + print(f"Case: {args.case}") + print(f"Loss: {args.loss}") + print(f"Device: {args.device}") + print(f"{'='*60}\n") + + +def _load_datasets(args): + """Load train, val, test datasets with automatic format detection. + + V2 format: case names ending with 'v2' (e.g., 14v2, 118v2) + Legacy format: case names without 'v2' (e.g., 14, 118) + """ + split = [0.7, 0.15, 0.15] + is_v2_format = args.case.endswith('v2') + + if is_v2_format: + print(f"Loading V2 dataset format for case '{args.case}'") + train_dataset = PowerFlowDataV2( + root=args.data_dir, case=args.case, split=split, + task='train', normalize=True) + xymean, xystd, edgemean, edgestd = train_dataset.get_normalization_stats() + + val_dataset = PowerFlowDataV2( + root=args.data_dir, case=args.case, split=split, + task='val', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + + test_dataset = PowerFlowDataV2( + root=args.data_dir, case=args.case, split=split, + task='test', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + + train_loader = PowerFlowDataLoaderV2(train_dataset, batch_size=args.batch_size, shuffle=False) + val_loader = PowerFlowDataLoaderV2(val_dataset, batch_size=args.batch_size, shuffle=False) + test_loader = PowerFlowDataLoaderV2(test_dataset, batch_size=args.batch_size, shuffle=False) + else: + print(f"Loading legacy dataset format for case '{args.case}'") + train_dataset = PowerFlowData( + root=args.data_dir, case=args.case, split=split, + task='train', normalize=True) + xymean, xystd, edgemean, edgestd = train_dataset.get_data_means_stds() + + val_dataset = PowerFlowData( + root=args.data_dir, case=args.case, split=split, + task='val', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + + test_dataset = PowerFlowData( + root=args.data_dir, case=args.case, split=split, + task='test', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + + train_loader = PowerFlowDataLoader(train_dataset, batch_size=args.batch_size, shuffle=False) + val_loader = PowerFlowDataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) + test_loader = PowerFlowDataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) + + print(f"✓ Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}") + + return train_loader, val_loader, test_loader, xymean, xystd, edgemean, edgestd + + +def _load_model_checkpoint(args, model): + """Load model checkpoint from disk.""" + model_dir = Path('models') / args.case / f"{args.model}_{args.run_id}" + + if not model_dir.exists(): + print(f"Error: Model directory not found: {model_dir}") + case_dir = Path('models') / args.case + if case_dir.exists(): + print(f"Available directories in models/{args.case}/:") + for d in sorted(case_dir.iterdir()): + print(f" {d.name}") + return False + + checkpoints = sorted(model_dir.glob('model_epoch_*.ckpt')) + if not checkpoints: + print(f"Error: No checkpoints found in {model_dir}") + return False + + checkpoint_path = checkpoints[-1] + print(f"Loading checkpoint: {checkpoint_path}") + + try: + param_dict = ms.load_checkpoint(str(checkpoint_path)) + ms.load_param_into_net(model, param_dict) + return True + except Exception as e: + print(f"Error loading model: {e}") + traceback.print_exc() + return False + + +def _print_evaluation_results(train_loss, val_loss, test_loss) -> None: + """Print evaluation results summary.""" + print(f"\n{'='*60}") + print("Evaluation Summary:") + print(f"{'='*60}") + print(f" Train Loss: {train_loss:.6f}") + print(f" Val Loss: {val_loss:.6f}") + print(f" Test Loss: {test_loss:.6f}") + print(f"{'='*60}\n") + + +def main(): + """Main evaluation function.""" + parser = _setup_argument_parser() + args = parser.parse_args() + + context.set_context(mode=context.PYNATIVE_MODE) + ms.set_device(device_target=args.device) + + _print_header(args) + + # Load dataset + print("Loading dataset...") + try: + train_loader, val_loader, test_loader, xymean, xystd, edgemean, edgestd = _load_datasets(args) + except Exception as e: + print(f"Error loading dataset: {e}") + traceback.print_exc() + print(f"Make sure data exists at {args.data_dir}/mindspore/raw/case{args.case}_*.npy") + return + + # Create and load model + config = { + 'nfeature_dim': 4, 'efeature_dim': 2, 'output_dim': 4, + 'hidden_dim': 64, 'n_layers': 3, 'k': 3, 'dropout_rate': 0.0, + } + + print("Creating model...") + model = create_model(args.model, config) + + if not _load_model_checkpoint(args, model): + return + + print(f"Model parameters: {num_params(model):,}\n") + + # Load loss function + loss_config = { + 'xymean': xymean, 'xystd': xystd, + 'edgemean': edgemean, 'edgestd': edgestd, + } + loss_fn = load_loss_function(args.loss, loss_config) + + # Evaluate + print("Evaluating on Training Set:") + print("-" * 60) + train_loss = evaluate_simple(model, train_loader, loss_fn) + print(f" Loss: {train_loss:.6f}") + + print("\nEvaluating on Validation Set:") + print("-" * 60) + val_loss = evaluate_simple(model, val_loader, loss_fn) + print(f" Loss: {val_loss:.6f}") + + print("\nEvaluating on Test Set:") + print("-" * 60) + test_loss = evaluate_simple(model, test_loader, loss_fn) + print(f" Loss: {test_loss:.6f}") + + _print_evaluation_results(train_loss, val_loss, test_loss) + + +if __name__ == '__main__': + main() diff --git a/MindEnergy/applications/PowerFlowNet/notebook_mindspore.ipynb b/MindEnergy/applications/PowerFlowNet/notebook_mindspore.ipynb new file mode 100644 index 000000000..bc3016f57 --- /dev/null +++ b/MindEnergy/applications/PowerFlowNet/notebook_mindspore.ipynb @@ -0,0 +1,547 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7b026e5f", + "metadata": {}, + "source": [ + "# MindSpore PowerFlowNet 训练\n", + "\n", + "使用 `train.py` 训练不同网络结构,记录结果。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b0d24d40", + "metadata": {}, + "outputs": [], + "source": [ + "!unset RANK_TABLE_FILE" + ] + }, + { + "cell_type": "markdown", + "id": "921fad0f", + "metadata": {}, + "source": [ + "## 1. MLP 模型训练" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2fd02162", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[WARNING] ME(54174:281473670340832,MainProcess):2025-11-29-19:55:26.869.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "[WARNING] ME(54174:281473670340832,MainProcess):2025-11-29-19:55:37.735.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "Device set to: Ascend (ID: 0)\n", + "✓ JIT level set to O0 for Ascend compatibility\n", + "\n", + "📊 Loading datasets...\n", + "Using legacy dataset format for case '14'\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_train.h5\n", + "[WARNING] DEVICE(54174,ffffb22260e0,python):2025-11-29-19:55:37.974.139 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:152] CheckVmmDriverVersion] Open file /etc/ascend_install.info failed.\n", + "[WARNING] DEVICE(54174,ffffb22260e0,python):2025-11-29-19:55:37.975.028 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:191] CheckVmmDriverVersion] Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: 23.0.6\n", + "✓ Normalization stats computed from training set\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_val.h5\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_test.h5\n", + "✓ Train: 70, Val: 15, Test: 15\n", + "\n", + "🧠 Creating MLP model...\n", + "✓ Model has 4,740 trainable parameters\n", + "✓ Config saved to logs/14/mlp_20251129-195544/config.json\n", + "\n", + "============================================================\n", + "Training MLP on 14\n", + "============================================================\n", + "Epoch 1 | Train Loss: 1.006420 | Val Loss: 0.982756 | 16.21s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_1.ckpt\n", + "Epoch 2 | Train Loss: 0.998078 | Val Loss: 0.979875 | 0.06s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_2.ckpt\n", + "Epoch 3 | Train Loss: 0.987216 | Val Loss: 0.979282 | 0.05s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_3.ckpt\n", + "Epoch 4 | Train Loss: 0.985248 | Val Loss: 0.980005 | 0.04s\n", + "Epoch 5 | Train Loss: 0.986942 | Val Loss: 0.980786 | 0.04s\n", + "Epoch 6 | Train Loss: 0.983064 | Val Loss: 0.981203 | 0.05s\n", + "Epoch 7 | Train Loss: 0.980125 | Val Loss: 0.980996 | 0.05s\n", + "Epoch 8 | Train Loss: 0.984436 | Val Loss: 0.980053 | 0.05s\n", + "Epoch 9 | Train Loss: 0.986425 | Val Loss: 0.978666 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_9.ckpt\n", + "Epoch 10 | Train Loss: 0.979563 | Val Loss: 0.977012 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_10.ckpt\n", + "Epoch 11 | Train Loss: 0.977033 | Val Loss: 0.975111 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_11.ckpt\n", + "Epoch 12 | Train Loss: 0.974924 | Val Loss: 0.973268 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_12.ckpt\n", + "Epoch 13 | Train Loss: 0.978134 | Val Loss: 0.971639 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_13.ckpt\n", + "Epoch 14 | Train Loss: 0.977845 | Val Loss: 0.970252 | 0.05s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_14.ckpt\n", + "Epoch 15 | Train Loss: 0.977834 | Val Loss: 0.969279 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_15.ckpt\n", + "Epoch 16 | Train Loss: 0.977289 | Val Loss: 0.968568 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_16.ckpt\n", + "Epoch 17 | Train Loss: 0.975304 | Val Loss: 0.968137 | 0.05s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_17.ckpt\n", + "Epoch 18 | Train Loss: 0.973641 | Val Loss: 0.967840 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_18.ckpt\n", + "Epoch 19 | Train Loss: 0.978667 | Val Loss: 0.967651 | 0.05s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_19.ckpt\n", + "Epoch 20 | Train Loss: 0.974753 | Val Loss: 0.967581 | 0.04s\n", + "✓ Model saved to models/14/mlp_20251129-195544/model_epoch_20.ckpt\n", + "\n", + "============================================================\n", + "Test Loss: 0.998777\n", + "============================================================\n", + "\n", + "✓ History saved to logs/14/mlp_20251129-195544/history.json\n", + "✓ Plot saved to logs/14/mlp_20251129-195544/training_curve.png\n", + "✅ Training completed!\n" + ] + } + ], + "source": [ + "!python train.py --model mlp --epochs 20 --device Ascend" + ] + }, + { + "cell_type": "markdown", + "id": "77a68b32", + "metadata": {}, + "source": [ + "## 2. MPN 模型训练" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c58ecff6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[WARNING] ME(56643:281473175081184,MainProcess):2025-11-29-19:56:12.117.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "[WARNING] ME(56643:281473175081184,MainProcess):2025-11-29-19:56:22.840.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "Device set to: Ascend (ID: 0)\n", + "✓ JIT level set to O0 for Ascend compatibility\n", + "\n", + "📊 Loading datasets...\n", + "Using legacy dataset format for case '14'\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_train.h5\n", + "[WARNING] DEVICE(56643,ffff949d50e0,python):2025-11-29-19:56:23.051.589 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:152] CheckVmmDriverVersion] Open file /etc/ascend_install.info failed.\n", + "[WARNING] DEVICE(56643,ffff949d50e0,python):2025-11-29-19:56:23.051.705 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:191] CheckVmmDriverVersion] Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: 23.0.6\n", + "✓ Normalization stats computed from training set\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_val.h5\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_test.h5\n", + "✓ Train: 70, Val: 15, Test: 15\n", + "\n", + "🧠 Creating MPN model...\n", + "✓ Model has 38,788 trainable parameters\n", + "✓ Config saved to logs/14/mpn_20251129-195637/config.json\n", + "\n", + "============================================================\n", + "Training MPN on 14\n", + "============================================================\n", + "..Epoch 1 | Train Loss: 1.637114 | Val Loss: 1.022665 | 41.42s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_1.ckpt\n", + "Epoch 2 | Train Loss: 1.027335 | Val Loss: 1.079274 | 32.54s\n", + "Epoch 3 | Train Loss: 1.106212 | Val Loss: 1.158698 | 34.15s\n", + "Epoch 4 | Train Loss: 0.996678 | Val Loss: 1.199942 | 20.29s\n", + "Epoch 5 | Train Loss: 0.999584 | Val Loss: 1.212999 | 18.97s\n", + "Epoch 6 | Train Loss: 1.217058 | Val Loss: 1.132852 | 32.08s\n", + "Epoch 7 | Train Loss: 1.142278 | Val Loss: 1.052381 | 31.32s\n", + "Epoch 8 | Train Loss: 0.983734 | Val Loss: 1.013706 | 18.93s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_8.ckpt\n", + "Epoch 9 | Train Loss: 1.027235 | Val Loss: 0.995602 | 32.07s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_9.ckpt\n", + "Epoch 10 | Train Loss: 1.006690 | Val Loss: 0.990448 | 32.74s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_10.ckpt\n", + "Epoch 11 | Train Loss: 0.988391 | Val Loss: 0.989474 | 33.68s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_11.ckpt\n", + "Epoch 12 | Train Loss: 0.985416 | Val Loss: 0.989226 | 32.00s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_12.ckpt\n", + "Epoch 13 | Train Loss: 0.990246 | Val Loss: 0.990439 | 32.71s\n", + "Epoch 14 | Train Loss: 0.986866 | Val Loss: 0.991505 | 33.84s\n", + "Epoch 15 | Train Loss: 0.981408 | Val Loss: 0.993201 | 33.07s\n", + "Epoch 16 | Train Loss: 0.984430 | Val Loss: 0.994146 | 33.31s\n", + "Epoch 17 | Train Loss: 0.980919 | Val Loss: 0.992938 | 33.73s\n", + "Epoch 18 | Train Loss: 0.980055 | Val Loss: 0.989699 | 33.32s\n", + "Epoch 19 | Train Loss: 0.975675 | Val Loss: 0.985385 | 33.52s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_19.ckpt\n", + "Epoch 20 | Train Loss: 0.974188 | Val Loss: 0.981675 | 35.22s\n", + "✓ Model saved to models/14/mpn_20251129-195637/model_epoch_20.ckpt\n", + "\n", + "============================================================\n", + "Test Loss: 0.998121\n", + "============================================================\n", + "\n", + "✓ History saved to logs/14/mpn_20251129-195637/history.json\n", + "✓ Plot saved to logs/14/mpn_20251129-195637/training_curve.png\n", + "✅ Training completed!\n" + ] + } + ], + "source": [ + "!python train.py --model mpn --epochs 20 --device Ascend" + ] + }, + { + "cell_type": "markdown", + "id": "8b6c2510", + "metadata": {}, + "source": [ + "## 3. GCN 模型训练" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b3a3c0b2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[WARNING] ME(69742:281473275691232,MainProcess):2025-11-29-20:07:23.243.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "[WARNING] ME(69742:281473275691232,MainProcess):2025-11-29-20:07:35.990.00 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "Device set to: Ascend (ID: 0)\n", + "✓ JIT level set to O0 for Ascend compatibility\n", + "\n", + "📊 Loading datasets...\n", + "Using legacy dataset format for case '14'\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_train.h5\n", + "[WARNING] DEVICE(69742,ffff9a9c80e0,python):2025-11-29-20:07:35.308.838 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:152] CheckVmmDriverVersion] Open file /etc/ascend_install.info failed.\n", + "[WARNING] DEVICE(69742,ffff9a9c80e0,python):2025-11-29-20:07:35.308.962 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:191] CheckVmmDriverVersion] Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: 23.0.6\n", + "✓ Normalization stats computed from training set\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_val.h5\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_test.h5\n", + "✓ Train: 70, Val: 15, Test: 15\n", + "\n", + "🧠 Creating GCN model...\n", + "✓ Model has 8,900 trainable parameters\n", + "✓ Config saved to logs/14/gcn_20251129-200741/config.json\n", + "\n", + "============================================================\n", + "Training GCN on 14\n", + "============================================================\n", + ".Epoch 1 | Train Loss: 0.990125 | Val Loss: 0.971524 | 24.67s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_1.ckpt\n", + "Epoch 2 | Train Loss: 0.989173 | Val Loss: 0.971101 | 6.62s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_2.ckpt\n", + "Epoch 3 | Train Loss: 0.988392 | Val Loss: 0.970686 | 6.55s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_3.ckpt\n", + "Epoch 4 | Train Loss: 0.987715 | Val Loss: 0.970321 | 6.25s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_4.ckpt\n", + "Epoch 5 | Train Loss: 0.986790 | Val Loss: 0.969973 | 6.92s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_5.ckpt\n", + "Epoch 6 | Train Loss: 0.986058 | Val Loss: 0.969619 | 6.56s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_6.ckpt\n", + "Epoch 7 | Train Loss: 0.984963 | Val Loss: 0.969283 | 6.64s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_7.ckpt\n", + "Epoch 8 | Train Loss: 0.984463 | Val Loss: 0.969027 | 6.39s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_8.ckpt\n", + "Epoch 9 | Train Loss: 0.983615 | Val Loss: 0.968827 | 6.35s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_9.ckpt\n", + "Epoch 10 | Train Loss: 0.983725 | Val Loss: 0.968740 | 6.44s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_10.ckpt\n", + "Epoch 11 | Train Loss: 0.982996 | Val Loss: 0.968740 | 6.52s\n", + "✓ Model saved to models/14/gcn_20251129-200741/model_epoch_11.ckpt\n", + "Epoch 12 | Train Loss: 0.981613 | Val Loss: 0.968814 | 6.32s\n", + "Epoch 13 | Train Loss: 0.981590 | Val Loss: 0.969004 | 6.05s\n", + "Epoch 14 | Train Loss: 0.980937 | Val Loss: 0.969314 | 6.12s\n", + "Epoch 15 | Train Loss: 0.980815 | Val Loss: 0.969732 | 6.13s\n", + "Epoch 16 | Train Loss: 0.980730 | Val Loss: 0.970226 | 6.16s\n", + "Epoch 17 | Train Loss: 0.979805 | Val Loss: 0.970772 | 6.12s\n", + "Epoch 18 | Train Loss: 0.980099 | Val Loss: 0.971374 | 6.08s\n", + "Epoch 19 | Train Loss: 0.979083 | Val Loss: 0.971996 | 6.12s\n", + "Epoch 20 | Train Loss: 0.979665 | Val Loss: 0.972538 | 6.20s\n", + "\n", + "============================================================\n", + "Test Loss: 0.989027\n", + "============================================================\n", + "\n", + "✓ History saved to logs/14/gcn_20251129-200741/history.json\n", + "✓ Plot saved to logs/14/gcn_20251129-200741/training_curve.png\n", + "✅ Training completed!\n" + ] + } + ], + "source": [ + "!python train.py --model gcn --epochs 20 --device Ascend" + ] + }, + { + "cell_type": "markdown", + "id": "273f8bc7", + "metadata": {}, + "source": [ + "### 4. mask_embed_multi_mpn 模型训练" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f7c3ff7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[WARNING] ME(74408:281473765286112,MainProcess):2025-11-29-20:10:19.260.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "[WARNING] ME(74408:281473765286112,MainProcess):2025-11-29-20:10:30.982.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "Device set to: Ascend (ID: 0)\n", + "✓ JIT level set to O0 for Ascend compatibility\n", + "\n", + "📊 Loading datasets...\n", + "Using legacy dataset format for case '14'\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_train.h5\n", + "[WARNING] DEVICE(74408,ffffb7cb20e0,python):2025-11-29-20:10:31.190.483 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:152] CheckVmmDriverVersion] Open file /etc/ascend_install.info failed.\n", + "[WARNING] DEVICE(74408,ffffb7cb20e0,python):2025-11-29-20:10:31.191.402 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:191] CheckVmmDriverVersion] Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: 23.0.6\n", + "✓ Normalization stats computed from training set\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_val.h5\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_test.h5\n", + "✓ Train: 70, Val: 15, Test: 15\n", + "\n", + "🧠 Creating MASK_EMBD_MULTI_MPN model...\n", + "✓ Model has 59,528 trainable parameters\n", + "✓ Config saved to logs/14/mask_embed_multi_mpn_20251129-201046/config.json\n", + "\n", + "============================================================\n", + "Training MASK_EMBD_MULTI_MPN on 14\n", + "============================================================\n", + "...Epoch 1 | Train Loss: 1.087661 | Val Loss: 0.991360 | 46.87s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_1.ckpt\n", + "Epoch 2 | Train Loss: 0.991444 | Val Loss: 0.977894 | 15.86s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_2.ckpt\n", + "Epoch 3 | Train Loss: 0.992521 | Val Loss: 0.979066 | 26.91s\n", + "Epoch 4 | Train Loss: 0.985883 | Val Loss: 0.986818 | 16.25s\n", + "Epoch 5 | Train Loss: 1.007351 | Val Loss: 0.976410 | 24.91s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_5.ckpt\n", + "Epoch 6 | Train Loss: 0.982748 | Val Loss: 0.974214 | 16.24s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_6.ckpt\n", + "Epoch 7 | Train Loss: 0.977369 | Val Loss: 0.974370 | 25.37s\n", + "Epoch 8 | Train Loss: 0.979914 | Val Loss: 0.975002 | 25.65s\n", + "Epoch 9 | Train Loss: 0.981397 | Val Loss: 0.974713 | 26.39s\n", + "Epoch 10 | Train Loss: 0.977533 | Val Loss: 0.974221 | 26.01s\n", + "Epoch 11 | Train Loss: 0.980950 | Val Loss: 0.973269 | 16.64s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_11.ckpt\n", + "Epoch 12 | Train Loss: 0.980683 | Val Loss: 0.971855 | 17.43s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_12.ckpt\n", + "Epoch 13 | Train Loss: 0.975146 | Val Loss: 0.970862 | 24.44s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_13.ckpt\n", + "Epoch 14 | Train Loss: 0.969727 | Val Loss: 0.970393 | 25.41s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_14.ckpt\n", + "Epoch 15 | Train Loss: 0.978769 | Val Loss: 0.970223 | 16.00s\n", + "✓ Model saved to models/14/mask_embed_multi_mpn_20251129-201046/model_epoch_15.ckpt\n", + "Epoch 16 | Train Loss: 0.969222 | Val Loss: 0.971157 | 26.00s\n", + "Epoch 17 | Train Loss: 0.977314 | Val Loss: 0.974023 | 25.83s\n", + "Epoch 18 | Train Loss: 0.972875 | Val Loss: 0.977315 | 26.30s\n", + "Epoch 19 | Train Loss: 0.969424 | Val Loss: 0.980232 | 26.76s\n", + "Epoch 20 | Train Loss: 0.967519 | Val Loss: 0.983229 | 26.94s\n", + "\n", + "============================================================\n", + "Test Loss: 1.004601\n", + "============================================================\n", + "\n", + "✓ History saved to logs/14/mask_embed_multi_mpn_20251129-201046/history.json\n", + "✓ Plot saved to logs/14/mask_embed_multi_mpn_20251129-201046/training_curve.png\n", + "✅ Training completed!\n" + ] + } + ], + "source": [ + "!python train.py --model mask_embed_multi_mpn --epochs 20 --device Ascend" + ] + }, + { + "cell_type": "markdown", + "id": "82ed30bb", + "metadata": {}, + "source": [ + "### 5. mpn_simplenet 模型训练" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "69425396", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[WARNING] ME(101693:281473626976480,MainProcess):2025-11-29-20:32:22.310.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for type is zero.\n", + " setattr(self, word, getattr(machar, word).flat[0])\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for type is zero.\n", + " return self._float_to_str(self.smallest_subnormal)\n", + "[WARNING] ME(101693:281473626976480,MainProcess):2025-11-29-20:32:34.300.000 [mindspore/run_check/_check_version.py:409] Can not find the tbe operator implementation(need by mindspore-ascend). Please check whether the Environment Variable PYTHONPATH is set. For details, refer to the installation guidelines: https://www.mindspore.cn/install\n", + "Device set to: Ascend (ID: 0)\n", + "✓ JIT level set to O0 for Ascend compatibility\n", + "\n", + "📊 Loading datasets...\n", + "Using legacy dataset format for case '14'\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_train.h5\n", + "[WARNING] DEVICE(101693,ffffaf8cb0e0,python):2025-11-29-20:32:34.551.062 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:152] CheckVmmDriverVersion] Open file /etc/ascend_install.info failed.\n", + "[WARNING] DEVICE(101693,ffffaf8cb0e0,python):2025-11-29-20:32:34.551.172 [mindspore/ccsrc/plugin/res_manager/ascend/mem_manager/ascend_vmm_adapter.h:191] CheckVmmDriverVersion] Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: 23.0.6\n", + "✓ Normalization stats computed from training set\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_val.h5\n", + "xymean, xystd assigned.\n", + "edgemean, edgestd assigned.\n", + "Loading pre-processed data from ./data/mindspore/processed/case14_split70_15_15_test.h5\n", + "✓ Train: 70, Val: 15, Test: 15\n", + "\n", + "🧠 Creating MPN_SIMPLENET model...\n", + "✓ Model has 38,788 trainable parameters\n", + "✓ Config saved to logs/14/mpn_simplenet_20251129-203250/config.json\n", + "\n", + "============================================================\n", + "Training MPN_SIMPLENET on 14\n", + "============================================================\n", + ".Epoch 1 | Train Loss: 0.992810 | Val Loss: 0.975911 | 24.91s\n", + "✓ Model saved to models/14/mpn_simplenet_20251129-203250/model_epoch_1.ckpt\n", + "Epoch 2 | Train Loss: 0.976788 | Val Loss: 0.981428 | 17.00s\n", + "Epoch 3 | Train Loss: 0.973201 | Val Loss: 0.980770 | 16.33s\n", + "Epoch 4 | Train Loss: 0.970793 | Val Loss: 0.978732 | 17.57s\n", + "Epoch 5 | Train Loss: 0.966148 | Val Loss: 0.977172 | 17.45s\n", + "Epoch 6 | Train Loss: 0.961575 | Val Loss: 0.976519 | 16.91s\n", + "Epoch 7 | Train Loss: 0.953210 | Val Loss: 0.976912 | 17.46s\n", + "Epoch 8 | Train Loss: 0.949938 | Val Loss: 0.977088 | 17.64s\n", + "Epoch 9 | Train Loss: 0.952222 | Val Loss: 0.976507 | 17.05s\n", + "Epoch 10 | Train Loss: 0.941590 | Val Loss: 0.975597 | 17.51s\n", + "✓ Model saved to models/14/mpn_simplenet_20251129-203250/model_epoch_10.ckpt\n", + "Epoch 11 | Train Loss: 0.942225 | Val Loss: 0.974691 | 17.04s\n", + "✓ Model saved to models/14/mpn_simplenet_20251129-203250/model_epoch_11.ckpt\n", + "Epoch 12 | Train Loss: 0.937784 | Val Loss: 0.974231 | 16.99s\n", + "✓ Model saved to models/14/mpn_simplenet_20251129-203250/model_epoch_12.ckpt\n", + "Epoch 13 | Train Loss: 0.935609 | Val Loss: 0.974804 | 17.24s\n", + "Epoch 14 | Train Loss: 0.926611 | Val Loss: 0.976221 | 17.22s\n", + "Epoch 15 | Train Loss: 0.922159 | Val Loss: 0.977609 | 16.88s\n", + "Epoch 16 | Train Loss: 0.919112 | Val Loss: 0.978682 | 17.05s\n", + "Epoch 17 | Train Loss: 0.916570 | Val Loss: 0.979899 | 17.31s\n", + "Epoch 18 | Train Loss: 0.914839 | Val Loss: 0.981365 | 17.77s\n", + "Epoch 19 | Train Loss: 0.912460 | Val Loss: 0.983416 | 18.02s\n", + "Epoch 20 | Train Loss: 0.906930 | Val Loss: 0.986395 | 16.89s\n", + "\n", + "============================================================\n", + "Test Loss: 0.999522\n", + "============================================================\n", + "\n", + "✓ History saved to logs/14/mpn_simplenet_20251129-203250/history.json\n", + "✓ Plot saved to logs/14/mpn_simplenet_20251129-203250/training_curve.png\n", + "✅ Training completed!\n" + ] + } + ], + "source": [ + "!python train.py --model mpn_simplenet --epochs 20 --device Ascend" + ] + }, + { + "cell_type": "markdown", + "id": "2717a6da", + "metadata": {}, + "source": [ + "## 6. Conclusion\n", + "\n", + "v2 数据集兼容的模型:\n", + "\n", + "- `mlp` - 简单MLP网络\n", + "- `mpn` - 基础消息传递网络\n", + "- `gcn` - 图卷积网络\n", + "- `mask_embed_multi_mpn` - 带掩码嵌入的多层消息传递网络\n", + "- `mpn_simplenet` - 简化版MPN\n", + "\n", + "不兼容的模型 (需要12维特征输入):\n", + "\n", + "- `skip_mpn`, `mask_embed_mpn`, `multi_mpn`, `mask_embed_multi_mpn_nomp`, `multi_conv_net`" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MindSpore", + "language": "python", + "name": "mindspore" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/MindEnergy/applications/PowerFlowNet/requirements.txt b/MindEnergy/applications/PowerFlowNet/requirements.txt new file mode 100644 index 000000000..1b94cb9c4 --- /dev/null +++ b/MindEnergy/applications/PowerFlowNet/requirements.txt @@ -0,0 +1,8 @@ +# PowerFlowNet MindSpore Requirements +# Core dependencies +mindspore>=2.2.0 +numpy>=1.19.0 +tqdm + +# Optional: for visualization +matplotlib>=3.0.0 \ No newline at end of file diff --git a/MindEnergy/applications/PowerFlowNet/src/__init__.py b/MindEnergy/applications/PowerFlowNet/src/__init__.py index e82fab496..7e62082dd 100644 --- a/MindEnergy/applications/PowerFlowNet/src/__init__.py +++ b/MindEnergy/applications/PowerFlowNet/src/__init__.py @@ -14,12 +14,11 @@ # ============================================================================ """ PowerFlowNet MindSpore Network Implementations -Aligned with original PyTorch/torch_geometric structure """ from .mpn import ( - MPN, SkipMPN, MaskEmbdMPN, - MultiMPN, MaskEmbdMultiMPN, MaskEmbdMultiMPNNoMP, + MPN, SkipMPN, MaskEmbedMPN, + MultiMPN, MaskEmbedMultiMPN, MaskEmbedMultiMPNNoMP, MultiConvNet, MPNSimplenet, WrappedMultiConv ) from .gcn import GCNNet @@ -46,8 +45,8 @@ from .power_flow_data import ( ) __all__ = [ - 'MPN', 'SkipMPN', 'MaskEmbdMPN', - 'MultiMPN', 'MaskEmbdMultiMPN', 'MaskEmbdMultiMPNNoMP', + 'MPN', 'SkipMPN', 'MaskEmbedMPN', + 'MultiMPN', 'MaskEmbedMultiMPN', 'MaskEmbedMultiMPNNoMP', 'MultiConvNet', 'MPNSimplenet', 'WrappedMultiConv', 'GCNNet', 'MLPNet', 'MessagePassing', diff --git a/MindEnergy/applications/PowerFlowNet/src/argument_parser.py b/MindEnergy/applications/PowerFlowNet/src/argument_parser.py index 70047ad07..01db63e41 100644 --- a/MindEnergy/applications/PowerFlowNet/src/argument_parser.py +++ b/MindEnergy/applications/PowerFlowNet/src/argument_parser.py @@ -20,7 +20,7 @@ Argument parser for MindSpore PowerFlowNet. This module implements a unified argument parser that supports both JSON configuration -files and command-line arguments. It has been adapted from the original PyTorch version +files and command-line arguments. It has been adapted from the original version to work seamlessly with MindSpore framework while maintaining API compatibility. Key features: @@ -85,8 +85,8 @@ def argument_parser(): parser.add_argument('--dropout_rate', type=float, default=0.2, help='Dropout rate') parser.add_argument('--model', type=str, default='MPN', - choices=['MLPNet', 'MPN', 'GCNNet', 'SkipMPN', 'MaskEmbdMPN', - 'MultiMPN', 'MaskEmbdMultiMPN', 'MPNSimplenet', 'MultiConvNet'], + choices=['MLPNet', 'MPN', 'GCNNet', 'SkipMPN', 'MaskEmbedMPN', + 'MultiMPN', 'MaskEmbedMultiMPN', 'MPNSimplenet', 'MultiConvNet'], help='Model architecture') parser.add_argument('--regularize', type=bool, default=True, help='Include regularization in loss function') diff --git a/MindEnergy/applications/PowerFlowNet/src/custom_loss_functions.py b/MindEnergy/applications/PowerFlowNet/src/custom_loss_functions.py index 42ecd0603..4325cdbe9 100644 --- a/MindEnergy/applications/PowerFlowNet/src/custom_loss_functions.py +++ b/MindEnergy/applications/PowerFlowNet/src/custom_loss_functions.py @@ -15,7 +15,7 @@ """ Custom Loss Functions for MindSpore PowerFlowNet -Implements aligned loss functions with PyTorch version: +Implements aligned loss functions with MessagePassing version: - MaskedL2Loss: L2 loss with masking support - PowerMaskedLoss: Unified L1/L2 loss with per-feature breakdown - PowerImbalance: Physics-informed loss for power flow equations @@ -279,7 +279,7 @@ class PowerImbalance(nn.Cell): def aggregate(self, messages: Tensor, edge_index: Tensor, num_nodes: int) -> Tensor: """Aggregate messages to source nodes (sum aggregation) - PyTorch MessagePassing with flow='target_to_source': + MessagePassing with flow='target_to_source': - The edge direction is REVERSED internally - x_i comes from edge_index[0] (original source, reversed target) - x_j comes from edge_index[1] (original target, reversed source) @@ -328,7 +328,7 @@ class PowerImbalance(nn.Cell): num_nodes = x.shape[0] - # With flow='target_to_source', PyTorch reverses the edge direction internally: + # With flow='target_to_source', MessagePassing reverses the edge direction internally: # - x_i comes from edge_index[0] (original source) # - x_j comes from edge_index[1] (original target) src_idx = edge_index[0] # i (original source, reversed target) diff --git a/MindEnergy/applications/PowerFlowNet/src/evaluation.py b/MindEnergy/applications/PowerFlowNet/src/evaluation.py index e0827ac47..ead0dc653 100644 --- a/MindEnergy/applications/PowerFlowNet/src/evaluation.py +++ b/MindEnergy/applications/PowerFlowNet/src/evaluation.py @@ -15,7 +15,7 @@ """ MindSpore Evaluation Module -Provides evaluation functions aligned with PyTorch version: +Provides evaluation functions aligned with original PowerFlowNet version: - evaluate_epoch: Simple epoch evaluation with single loss value - evaluate_epoch_v2: Detailed evaluation with loss term breakdowns - load_model: Load trained model from checkpoint diff --git a/MindEnergy/applications/PowerFlowNet/src/gnn_ops.py b/MindEnergy/applications/PowerFlowNet/src/gnn_ops.py index 2a4842ec8..4662a6eac 100644 --- a/MindEnergy/applications/PowerFlowNet/src/gnn_ops.py +++ b/MindEnergy/applications/PowerFlowNet/src/gnn_ops.py @@ -1,406 +1,402 @@ -# Copyright 2025 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Lightweight MindSpore GNN operations -Minimal implementation of torch_geometric-like functionality for MindSpore -Optimized for CPU and NPU compatibility -""" - -import inspect - -import mindspore as ms -from mindspore import Tensor, ops, nn, mint - - -class MessagePassing(nn.Cell): - """ - Lightweight MessagePassing base class for GNN layers - Simplified version compatible with both CPU and NPU - """ - - def __init__(self, aggr: str = 'add', flow: str = 'src_to_trg'): - super().__init__() - self.aggr = aggr - self.flow = flow - - def aggregate(self, x_i, aggr_index, num_nodes): - """ - Aggregate messages based on aggregation type - - Args: - x_i: Messages to aggregate (num_edges, feature_dim) - aggr_index: Target node indices (num_edges,) - num_nodes: Total number of nodes - - Returns: - Aggregated values (num_nodes, feature_dim) - """ - if self.aggr in ('add', 'sum'): - return self._segment_sum(x_i, aggr_index, num_nodes) - if self.aggr == 'mean': - return self._segment_mean(x_i, aggr_index, num_nodes) - if self.aggr == 'max': - return self._segment_max(x_i, aggr_index, num_nodes) - raise ValueError(f"Unknown aggregation type: {self.aggr}") - - @staticmethod - def _segment_sum(values, indices, num_nodes): - """CPU/NPU compatible segment sum""" - result = mint.zeros((num_nodes, values.shape[-1]), dtype=values.dtype) - - for i in range(num_nodes): - mask = indices == i - if ops.any(mask): - result[i] = ops.sum(values[mask], dim=0) - - return result - - @staticmethod - def _segment_mean(values, indices, num_nodes): - """CPU/NPU compatible segment mean""" - result_sum = MessagePassing._segment_sum(values, indices, num_nodes) - - # Count occurrences - count = mint.zeros((num_nodes,), dtype=ms.float32) - for i in range(num_nodes): - mask = indices == i - count[i] = ops.sum(mask.astype(ms.float32)) - - # Avoid division by zero - count = ops.where(count > 0, count, mint.ones_like(count)) - - return result_sum / count.expand_dims(-1) - - @staticmethod - def _segment_max(values, indices, num_nodes): - """CPU/NPU compatible segment max""" - result = mint.full((num_nodes, values.shape[-1]), float('-inf'), dtype=values.dtype) - - for i in range(num_nodes): - mask = indices == i - if ops.any(mask): - result[i] = ops.max(values[mask], axis=0) - - return result - - def propagate(self, x, edge_index, edge_attr=None, **kwargs): - """ - Execute message passing - - Args: - x: Node features (num_nodes, feature_dim) - edge_index: Edge indices (2, num_edges) - edge_attr: Edge attributes (num_edges, edge_dim) - **kwargs: Additional arguments passed to message() - - Returns: - Aggregated messages (num_nodes, feature_dim) - """ - src, dst = edge_index[0], edge_index[1] - num_nodes = x.shape[0] - - # Get source and target node features - x_src = x[src] # (num_edges, feature_dim) - features of source nodes - x_dst = x[dst] # (num_edges, feature_dim) - features of target nodes - - # Prepare message arguments - # torch_geometric naming: x_i is target, x_j is source - message_kwargs = {'x_i': x_dst, 'x_j': x_src} - if edge_attr is not None: - message_kwargs['edge_attr'] = edge_attr - message_kwargs.update(kwargs) - - # Get the signature of the message method to filter arguments - message_sig = inspect.signature(self.message) - message_params = set(message_sig.parameters.keys()) - - # Filter to only pass arguments that the message method accepts - filtered_kwargs = { - k: v for k, v in message_kwargs.items() - if k in message_params - } - - # Compute messages - messages = self.message(**filtered_kwargs) # (num_edges, feature_dim) - - # Aggregate messages - out = self.aggregate(messages, dst, num_nodes) - - # Update (default: identity) - out = self.update(out, x) - - return out - - def message(self, **kwargs): - """Compute messages for message passing. - - This method should be overridden in subclasses to define custom message computation. - - Args: - **kwargs: Keyword arguments that may include: - x_i (Tensor): Features of target nodes (num_edges, feature_dim). - x_j (Tensor): Features of source nodes (num_edges, feature_dim). - edge_attr (Tensor): Edge attributes if available (num_edges, edge_dim). - - Returns: - Tensor: Computed messages with shape (num_edges, feature_dim). - """ - return kwargs.get('x', None) - - def update(self, aggr_out, x): - """Update node features. Override in subclass.""" - del x # Unused in base class - return aggr_out - -class TAGConv(MessagePassing): - """ - Topology Adaptive Graph Convolutional Network layer - Equivalent to torch_geometric TAGConv implementation - - Reference: "Topology Adaptive Graph Convolutional Networks" - https://arxiv.org/abs/1710.10370 - """ - - def __init__(self, in_channels: int, out_channels: int, k: int = 3, - bias: bool = True, normalize: bool = True): - super().__init__(aggr='add') - self.in_channels = in_channels - self.out_channels = out_channels - self.k = k - self.normalize = normalize - - # Linear transformations for each hop (matching torch_geometric) - # Note: k+1 linear layers for k-hop aggregation + identity - self.lins = nn.CellList([ - nn.Dense(in_channels, out_channels, has_bias=False) - for _ in range(k + 1) - ]) - - if bias: - self.bias = ms.Parameter(mint.zeros(out_channels, dtype=ms.float32)) - else: - self.bias = None - - def construct(self, x, edge_index, edge_weight=None): - """ - Forward pass - matches torch_geometric TAGConv exactly - - Args: - x: Node features (num_nodes, in_channels) - edge_index: Edge indices (2, num_edges) - edge_weight: Edge weights (num_edges,) optional - - Returns: - Output features (num_nodes, out_channels) - """ - # Step 1: Apply GCN normalization (symmetric: D^-0.5 A D^-0.5) - if self.normalize: - edge_weight = self._gcn_norm(x.shape[0], edge_index, edge_weight) - - # Step 2: Initial linear transformation (K=0 term) - out = self.lins[0](x) - - # Step 3: k-hop propagation with cumulative aggregation - x_k = x - for hop in range(1, self.k + 1): - # One-hop propagation using edge_weight - x_k = self._propagate_k(x_k, edge_index, edge_weight) - - # Add transformed aggregation to output - out = out + self.lins[hop](x_k) - - # Step 4: Add bias - if self.bias is not None: - out = out + self.bias - - return out - - def _gcn_norm(self, num_nodes, edge_index, edge_weight): - """ - Apply symmetric normalization: D^-0.5 A D^-0.5 - Using scatter_add equivalent for degree computation - """ - num_edges = edge_index.shape[1] - col = edge_index[1] # destination nodes - - # Initialize edge weight if not provided - if edge_weight is None: - edge_weight = mint.ones(num_edges, dtype=ms.float32) - - # Compute degree: scatter_add(edge_weight, col) - # deg[col[i]] += edge_weight[i] - deg = mint.zeros(num_nodes, dtype=ms.float32) - - # Use scatter_add via matrix multiplication trick - # Create a matrix where column i has all edge weights pointing to node i - for_scatter = ops.zeros((num_edges, num_nodes), dtype=ms.float32) - for i in range(num_edges): - for_scatter[i, col[i]] = edge_weight[i] - deg = ops.sum(for_scatter, dim=0) - - # Compute D^-0.5: handle zero degree nodes like PyTorch - # deg_inv_sqrt[deg == 0] = 0 (not inf) - # Use where to avoid inf: if deg > 0, compute 1/sqrt(deg), else 0 - deg_inv_sqrt = ops.where( - deg > 0, - ops.rsqrt(ops.maximum(deg, ms.Tensor(1e-10, ms.float32))), - ms.Tensor(0.0, ms.float32) - ) - - # Apply normalization: norm[i] = D^-0.5[src[i]] * weight[i] * D^-0.5[dst[i]] - src = edge_index[0] - norm = deg_inv_sqrt[src] * edge_weight * deg_inv_sqrt[col] - - return norm - - def _propagate_k(self, x, edge_index, edge_weight): - """ - Single hop propagation with proper normalization - Equivalent to torch_geometric message passing - """ - src = edge_index[0] # source nodes - dst = edge_index[1] # destination nodes - num_nodes = x.shape[0] - num_edges = src.shape[0] - - # Gather source features - x_src = x[src] # (num_edges, feature_dim) - - # Apply normalized edge weight - if edge_weight is not None: - x_src = x_src * edge_weight.expand_dims(-1) # (num_edges, feature_dim) - - # Aggregate using matrix multiplication trick - # Create aggregation matrix: agg[i,j] = 1 if dst[i] == j, 0 otherwise - agg_matrix = ops.zeros((num_edges, num_nodes), dtype=ms.float32) - for i in range(num_edges): - agg_matrix[i, dst[i]] = 1.0 - - # out[j] = sum_i(agg_matrix[i,j] * x_src[i]) - out = ops.matmul(agg_matrix.t(), x_src) - - return out - - -class GCNConv(MessagePassing): - """ - Graph Convolutional Network layer - Uses MessagePassing base class for proper aggregation - """ - - def __init__(self, in_channels: int, out_channels: int, bias: bool = True): - super().__init__(aggr='add') - self.in_channels = in_channels - self.out_channels = out_channels - - self.lin = nn.Dense(in_channels, out_channels, has_bias=bias) - - def message(self, x_j, norm=None, **kwargs): # pylint: disable=arguments-differ - """Compute messages - GCN uses neighbor features scaled by norm""" - if norm is not None: - return x_j * norm.expand_dims(-1) - return x_j - - def construct(self, x, edge_index, edge_weight=None): # pylint: disable=unused-argument - """ - Forward pass - - Args: - x: Node features (num_nodes, in_channels) - edge_index: Edge indices (2, num_edges) - edge_weight: Edge weights (num_edges,) optional, currently unused - - Returns: - Output features (num_nodes, out_channels) - """ - # Transform features first - x = self.lin(x) - - # Compute symmetric normalization - row, col = edge_index[0], edge_index[1] - num_nodes = x.shape[0] - num_edges = edge_index.shape[1] - - # Compute degrees using a simpler method - # Count how many edges point TO each node - deg = mint.zeros((num_nodes,), dtype=ms.float32) - for idx in range(num_edges): - node_idx = int(col[idx].asnumpy()) - deg[node_idx] = deg[node_idx] + 1.0 - - # Add 1 for self-loop (GCN convention) - deg = deg + 1.0 - - # Symmetric normalization: D^(-1/2) - deg_inv_sqrt = ops.pow(deg + 1e-8, -0.5) - - # Edge weights for normalization - norm = deg_inv_sqrt[row] * deg_inv_sqrt[col] - - # Use propagate for message passing - out = self.propagate(x=x, edge_index=edge_index, norm=norm) - - return out - - -def degree(index: Tensor, num_nodes=None, dtype=None) -> Tensor: - """ - Compute node degrees from edge index - CPU/NPU compatible implementation - - Args: - index: Node indices (num_edges,) - num_nodes: Total number of nodes - dtype: Output dtype - - Returns: - Degree tensor (num_nodes,) - """ - if num_nodes is None: - num_nodes = int(ops.max(index).asnumpy()) + 1 - - if dtype is None: - dtype = ms.float32 - - result = mint.zeros((num_nodes,), dtype=dtype) - - for i in range(num_nodes): - mask = index == i - result[i] = ops.sum(mask.astype(dtype)) - - return result - - -def to_undirected(edge_index: Tensor, num_nodes=None) -> Tensor: - """ - Convert directed graph to undirected - - Args: - edge_index: Edge indices (2, num_edges) - num_nodes: Total number of nodes (unused, kept for API compatibility) - - Returns: - Undirected edge index (2, 2*num_edges) - """ - del num_nodes # Unused, kept for API compatibility - src, dst = edge_index[0], edge_index[1] - - # Create reverse edges - reverse_edges = ops.stack([dst, src], axis=0) - - # Concatenate - undirected = ops.concat([edge_index, reverse_edges], axis=1) - - return undirected +# Copyright 2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Lightweight MindSpore GNN operations +Minimal implementation for MindSpore +Optimized for CPU and NPU compatibility +""" + +import mindspore as ms +from mindspore import Tensor, ops, nn, mint + + +class MessagePassing(nn.Cell): + """ + Lightweight MessagePassing base class for GNN layers + Simplified version compatible with both CPU and NPU + """ + + def __init__(self, aggr: str = 'add', flow: str = 'src_to_trg'): + super().__init__() + self.aggr = aggr + self.flow = flow + + def aggregate(self, x_i, aggr_index, num_nodes): + """ + Aggregate messages based on aggregation type + + Args: + x_i: Messages to aggregate (num_edges, feature_dim) + aggr_index: Target node indices (num_edges,) + num_nodes: Total number of nodes + + Returns: + Aggregated values (num_nodes, feature_dim) + """ + if self.aggr in ('add', 'sum'): + return self._segment_sum(x_i, aggr_index, num_nodes) + if self.aggr == 'mean': + return self._segment_mean(x_i, aggr_index, num_nodes) + if self.aggr == 'max': + return self._segment_max(x_i, aggr_index, num_nodes) + raise ValueError(f"Unknown aggregation type: {self.aggr}") + + @staticmethod + def _segment_sum(values, indices, num_nodes): + """CPU/NPU compatible segment sum using simple accumulation in PYNATIVE_MODE""" + result = mint.zeros((num_nodes, values.shape[-1]), dtype=values.dtype) + + # In PYNATIVE_MODE, simple loops with tensor indexing are allowed and support gradients + num_edges = int(indices.shape[0]) + for i in range(num_edges): + idx = int(indices[i].asnumpy()) + result[idx] = result[idx] + values[i] + + return result + + @staticmethod + def _segment_mean(values, indices, num_nodes): + """CPU/NPU compatible segment mean in PYNATIVE_MODE""" + result_sum = MessagePassing._segment_sum(values, indices, num_nodes) + + # Count occurrences + count = mint.zeros((num_nodes,), dtype=ms.float32) + num_edges = int(indices.shape[0]) + for i in range(num_edges): + idx = int(indices[i].asnumpy()) + count[idx] = count[idx] + 1.0 + + # Avoid division by zero + count = ops.where(count > 0, count, mint.ones_like(count)) + + return result_sum / count.expand_dims(-1) + + @staticmethod + def _segment_max(values, indices, num_nodes): + """CPU/NPU compatible segment max - fallback to simple implementation""" + result = mint.full((num_nodes, values.shape[-1]), float('-inf'), dtype=values.dtype) + + # For max, we need to iterate through nodes (cannot easily vectorize) + # But in PYNATIVE_MODE this is acceptable + num_nodes_int = int(num_nodes) + for i in range(num_nodes_int): + mask = indices == i + if ops.any(mask): + result[i] = ops.max(values[mask], axis=0) + + return result + + def propagate(self, x, edge_index, edge_attr=None, **kwargs): + """ + Execute message passing + + Args: + x: Node features (num_nodes, feature_dim) + edge_index: Edge indices (2, num_edges) + edge_attr: Edge attributes (num_edges, edge_dim) + **kwargs: Additional arguments passed to message() + + Returns: + Aggregated messages (num_nodes, feature_dim) + """ + src, dst = edge_index[0], edge_index[1] + num_nodes = x.shape[0] + + # Get source and target node features + x_src = x[src] # (num_edges, feature_dim) - features of source nodes + x_dst = x[dst] # (num_edges, feature_dim) - features of target nodes + + # Prepare message arguments + # geometric naming: x_i is target, x_j is source + message_kwargs = {'x_i': x_dst, 'x_j': x_src} + if edge_attr is not None: + message_kwargs['edge_attr'] = edge_attr + message_kwargs.update(kwargs) + + # Compute messages - pass all available kwargs, the message method will handle what it needs + messages = self.message(**message_kwargs) # (num_edges, feature_dim) + + # Aggregate messages + out = self.aggregate(messages, dst, num_nodes) + + # Update (default: identity) + out = self.update(out, x) + + return out + + def message(self, **kwargs): + """Compute messages for message passing. + + This method should be overridden in subclasses to define custom message computation. + + Args: + **kwargs: Keyword arguments that may include: + x_i (Tensor): Features of target nodes (num_edges, feature_dim). + x_j (Tensor): Features of source nodes (num_edges, feature_dim). + edge_attr (Tensor): Edge attributes if available (num_edges, edge_dim). + + Returns: + Tensor: Computed messages with shape (num_edges, feature_dim). + """ + return kwargs.get('x', None) + + def update(self, aggr_out, x): # pylint: disable=unused-argument + """Update node features. Override in subclass.""" + return aggr_out + +class TAGConv(MessagePassing): + """ + Topology Adaptive Graph Convolutional Network layer + Equivalent to TAGConv implementation + + Reference: "Topology Adaptive Graph Convolutional Networks" + https://arxiv.org/abs/1710.10370 + """ + + def __init__(self, in_channels: int, out_channels: int, k: int = 3, + bias: bool = True, normalize: bool = True): + super().__init__(aggr='add') + self.in_channels = in_channels + self.out_channels = out_channels + self.k = k + self.normalize = normalize + + # Linear transformations for each hop + # Note: k+1 linear layers for k-hop aggregation + identity + self.lins = nn.CellList([ + nn.Dense(in_channels, out_channels, has_bias=False) + for _ in range(k + 1) + ]) + + if bias: + self.bias = ms.Parameter(mint.zeros(out_channels, dtype=ms.float32)) + else: + self.bias = None + + def construct(self, x, edge_index, edge_weight=None): + """ + Forward pass - matches TAGConv exactly + + Args: + x: Node features (num_nodes, in_channels) + edge_index: Edge indices (2, num_edges) + edge_weight: Edge weights (num_edges,) optional + + Returns: + Output features (num_nodes, out_channels) + """ + # Step 1: Apply GCN normalization (symmetric: D^-0.5 A D^-0.5) + if self.normalize: + edge_weight = self._gcn_norm(x.shape[0], edge_index, edge_weight) + + # Step 2: Initial linear transformation (K=0 term) + out = self.lins[0](x) + + # Step 3: k-hop propagation with cumulative aggregation + x_k = x + for hop in range(1, self.k + 1): + # One-hop propagation using edge_weight + x_k = self._propagate_k(x_k, edge_index, edge_weight) + + # Add transformed aggregation to output + out = out + self.lins[hop](x_k) + + # Step 4: Add bias + if self.bias is not None: + out = out + self.bias + + return out + + def _gcn_norm(self, num_nodes, edge_index, edge_weight): + """ + Apply symmetric normalization: D^-0.5 A D^-0.5 + Using scatter_add equivalent for degree computation + """ + num_edges = edge_index.shape[1] + col = edge_index[1] # destination nodes + + # Initialize edge weight if not provided + if edge_weight is None: + edge_weight = mint.ones(num_edges, dtype=ms.float32) + + # Compute degree: scatter_add(edge_weight, col) + # deg[col[i]] += edge_weight[i] + deg = mint.zeros(num_nodes, dtype=ms.float32) + + # Use scatter_add via matrix multiplication trick + # Create a matrix where column i has all edge weights pointing to node i + for_scatter = ops.zeros((num_edges, num_nodes), dtype=ms.float32) + for i in range(num_edges): + for_scatter[i, col[i]] = edge_weight[i] + deg = ops.sum(for_scatter, dim=0) + + # Compute D^-0.5: handle zero degree nodes + # deg_inv_sqrt[deg == 0] = 0 (not inf) + # Use where to avoid inf: if deg > 0, compute 1/sqrt(deg), else 0 + deg_inv_sqrt = ops.where( + deg > 0, + ops.rsqrt(ops.maximum(deg, ms.Tensor(1e-10, ms.float32))), + ms.Tensor(0.0, ms.float32) + ) + + # Apply normalization: norm[i] = D^-0.5[src[i]] * weight[i] * D^-0.5[dst[i]] + src = edge_index[0] + norm = deg_inv_sqrt[src] * edge_weight * deg_inv_sqrt[col] + + return norm + + def _propagate_k(self, x, edge_index, edge_weight): + """ + Single hop propagation with proper normalization + Equivalent to message passing + """ + src = edge_index[0] # source nodes + dst = edge_index[1] # destination nodes + num_nodes = x.shape[0] + num_edges = src.shape[0] + + # Gather source features + x_src = x[src] # (num_edges, feature_dim) + + # Apply normalized edge weight + if edge_weight is not None: + x_src = x_src * edge_weight.expand_dims(-1) # (num_edges, feature_dim) + + # Aggregate using matrix multiplication trick + # Create aggregation matrix: agg[i,j] = 1 if dst[i] == j, 0 otherwise + agg_matrix = ops.zeros((num_edges, num_nodes), dtype=ms.float32) + for i in range(num_edges): + agg_matrix[i, dst[i]] = 1.0 + + # out[j] = sum_i(agg_matrix[i,j] * x_src[i]) + out = ops.matmul(agg_matrix.t(), x_src) + + return out + + +class GCNConv(nn.Cell): + """ + Graph Convolutional Network layer - Simplified implementation + Uses dense matrix multiplication for aggregation, compatible with MindSpore CPU/NPU + """ + + def __init__(self, in_channels: int, out_channels: int, bias: bool = True): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + self.lin = nn.Dense(in_channels, out_channels, has_bias=bias) + + def construct(self, x, edge_index, edge_weight=None): # pylint: disable=unused-argument + """ + Forward pass with GCN normalization + + Args: + x: Node features (num_nodes, in_channels) + edge_index: Edge indices (2, num_edges) + edge_weight: Edge weights (num_edges,) optional + + Returns: + Output features (num_nodes, out_channels) + """ + # Transform features first + x = self.lin(x) + + # Compute symmetric normalization + row, col = edge_index[0], edge_index[1] + num_nodes = x.shape[0] + num_edges = int(edge_index.shape[1]) + + # Compute degrees using simple loop (PYNATIVE_MODE compatible) + deg = mint.zeros((num_nodes,), dtype=ms.float32) + for i in range(num_edges): + idx = int(col[i].asnumpy()) + deg[idx] = deg[idx] + 1.0 + + # Add 1 for self-loop (GCN convention) + deg = deg + 1.0 + + # Symmetric normalization: D^(-1/2) + deg_inv_sqrt = ops.pow(deg + 1e-8, -0.5) + + # Edge normalization weights + norm = deg_inv_sqrt[row] * deg_inv_sqrt[col] + + # Build adjacency matrix using simple loop aggregation + a_norm = mint.zeros((num_nodes, num_nodes), dtype=ms.float32) + for i in range(num_edges): + src_idx = int(row[i].asnumpy()) + dst_idx = int(col[i].asnumpy()) + a_norm[dst_idx, src_idx] = a_norm[dst_idx, src_idx] + norm[i] + + # Apply GCN aggregation: out = a_norm @ x + out = ops.matmul(a_norm, x) + + return out + + +def degree(index: Tensor, num_nodes=None, dtype=None) -> Tensor: + """ + Compute node degrees from edge index using bincount-like operation + Vectorized implementation for MindSpore + + Args: + index: Node indices (num_edges,) + num_nodes: Total number of nodes + dtype: Output dtype + + Returns: + Degree tensor (num_nodes,) + """ + if num_nodes is None: + num_nodes = int(ops.max(index).asnumpy()) + 1 + + if dtype is None: + dtype = ms.float32 + + # Create result tensor + result = mint.zeros((num_nodes,), dtype=dtype) + + # Use scatter_nd to accumulate degrees + # First, flatten index and create 2D indices for scatter_nd + indices = ops.reshape(index.astype(ms.int32), (-1, 1)) + updates = mint.ones((index.shape[0],), dtype=dtype) + + # Scatter add + result = ops.scatter_nd_add(result, indices, updates) + + return result + + +def to_undirected(edge_index: Tensor, num_nodes=None) -> Tensor: # pylint: disable=unused-argument + """ + Convert directed graph to undirected + + Args: + edge_index: Edge indices (2, num_edges) + num_nodes: Total number of nodes (unused, kept for API compatibility) + + Returns: + Undirected edge index (2, 2*num_edges) + """ + src, dst = edge_index[0], edge_index[1] + + # Create reverse edges + reverse_edges = ops.stack([dst, src], axis=0) + + # Concatenate + undirected = ops.concat([edge_index, reverse_edges], axis=1) + + return undirected diff --git a/MindEnergy/applications/PowerFlowNet/src/mpn.py b/MindEnergy/applications/PowerFlowNet/src/mpn.py index f0146677e..e79cfcbfe 100644 --- a/MindEnergy/applications/PowerFlowNet/src/mpn.py +++ b/MindEnergy/applications/PowerFlowNet/src/mpn.py @@ -15,7 +15,7 @@ # This file is a derivative work based on the original PowerFlowNet implementation # (https://github.com/stavrosorf/poweflownet) which was licensed under the MIT License. # Significant modifications have been made to adapt the code for the MindSpore framework, -# including replacement of PyTorch operations with MindSpore equivalents and +# including MindSpore equivalents and # optimization for Ascend hardware acceleration. # ============================================================================ """ @@ -23,26 +23,25 @@ PowerFlowNet Message Passing Network (MPN) - MindSpore Implementation. This module implements a comprehensive family of Message Passing Neural Networks for power flow prediction tasks. The implementation has been adapted from the -original PyTorch version to leverage MindSpore's tensor operations and device +original version to leverage MindSpore's tensor operations and device optimization capabilities, particularly for Ascend hardware acceleration. Architecture Overview: - Base MPN: Topology-aware aggregation using TAGConv with k-hop neighborhoods - SkipMPN: Enhanced with residual skip connections for improved gradient flow -- MaskEmbdMPN: Masked embedding mechanism for selective feature processing +- MaskEmbedMPN: Masked embedding mechanism for selective feature processing - MultiMPN: Multi-head message passing for diverse feature interactions - Advanced variants: Combinations of above features with architectural improvements Key Modifications for MindSpore: -1. TAGConv replaced torch_geometric.nn.TAGConv with custom MindSpore implementation +1. TAGConv replaced TAGConv with custom MindSpore implementation 2. MessagePassing base class adapted for MindSpore tensor operations 3. Device-specific operations (gather, scatter, where) optimized for Ascend 4. Batch processing adapted to MindSpore DataLoader API Compatibility: -- MindSpore 2.0+ +- MindSpore 2.7 - CPU and Ascend device support -- Numerical parity with PyTorch version verified """ import numpy as np @@ -99,8 +98,6 @@ class BaseMPN(nn.Cell): class EdgeAggregation(MessagePassing): """MessagePassing for aggregating edge features. - - Equivalent to torch_geometric EdgeAggregation with 'add' aggregation. """ def __init__(self, nfeature_dim, efeature_dim, hidden_dim, output_dim): super().__init__(aggr='add') @@ -108,7 +105,7 @@ class EdgeAggregation(MessagePassing): self.efeature_dim = efeature_dim self.output_dim = output_dim - # MLP for edge aggregation - matches torch version structure + # MLP for edge aggregation self.edge_aggr = nn.SequentialCell([ nn.Dense(nfeature_dim*2 + efeature_dim, hidden_dim), nn.ReLU(), @@ -178,7 +175,7 @@ class MPN(BaseMPN): - One-time Message Passing to aggregate edge features into node features - Multiple TAGConv layers - Equivalent to torch_geometric version. + Equivalent to message passing version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): @@ -252,7 +249,7 @@ class SkipMPN(BaseMPN): - One-time Message Passing to aggregate edge features - Multiple TAGConv layers - Equivalent to torch_geometric SkipMPN version. + Equivalent to original SkipMPN version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): @@ -309,7 +306,7 @@ class SkipMPN(BaseMPN): return x -class MaskEmbdMPN(BaseMPN): +class MaskEmbedMPN(BaseMPN): """Wrapped Message Passing Network with Mask Embedding. Architecture: @@ -317,7 +314,7 @@ class MaskEmbdMPN(BaseMPN): - One-time Message Passing to aggregate edge features - Multiple TAGConv layers - Equivalent to torch_geometric MaskEmbdMPN version. + Equivalent to original MaskEmbedMPN version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): @@ -331,11 +328,9 @@ class MaskEmbdMPN(BaseMPN): self.dropout_rate = dropout_rate # Embedding layer for mask: nfeature_dim -> hidden_dim -> nfeature_dim - # PyTorch: nn.Sequential(nn.Linear(nfeature_dim, hidden_dim), nn.ReLU(), - # nn.Linear(hidden_dim, nfeature_dim)) - self.mask_embd_fc1 = nn.Dense(nfeature_dim, hidden_dim) - self.mask_embd_fc2 = nn.Dense(hidden_dim, nfeature_dim) - self.mask_embd_relu = nn.ReLU() + self.mask_embed_fc1 = nn.Dense(nfeature_dim, hidden_dim) + self.mask_embed_fc2 = nn.Dense(hidden_dim, nfeature_dim) + self.mask_embed_relu = nn.ReLU() self.edge_aggr = EdgeAggregation(nfeature_dim, efeature_dim, hidden_dim, hidden_dim) @@ -356,8 +351,8 @@ class MaskEmbdMPN(BaseMPN): Expects 12D input: [one-hot bus_type(4) + features(4) + mask(4)] """ assert data.x.shape[-1] == self.nfeature_dim * 2 + 4, ( - f"MaskEmbdMPN expects 12D input [one-hot(4) + features({self.nfeature_dim}) + mask({self.nfeature_dim})], " - f"got {data.x.shape[-1]}D. Use mpn, gcn, mask_embd_multi_mpn, or mlp for 4D data." + f"MaskEmbedMPN expects 12D input [one-hot(4) + features({self.nfeature_dim}) + mask({self.nfeature_dim})], " + f"got {data.x.shape[-1]}D. Use mpn, gcn, mask_embed_multi_mpn, or mlp for 4D data." ) x = data.x[:, 4:4+self.nfeature_dim] # Extract features from 12D input mask = data.x[:, -self.nfeature_dim:] # Extract mask from 12D input @@ -365,10 +360,10 @@ class MaskEmbdMPN(BaseMPN): edge_features = data.edge_attr # Embed mask (nfeature_dim -> hidden_dim -> nfeature_dim) and add to features - mask_embd = self.mask_embd_fc1(mask) - mask_embd = self.mask_embd_relu(mask_embd) - mask_embd = self.mask_embd_fc2(mask_embd) - x = mask_embd + x + mask_embed = self.mask_embed_fc1(mask) + mask_embed = self.mask_embed_relu(mask_embed) + mask_embed = self.mask_embed_fc2(mask_embed) + x = mask_embed + x edge_index, edge_features = self.undirected_graph(edge_index, edge_features) @@ -392,7 +387,7 @@ class MultiMPN(BaseMPN): - Multi-step EdgeAggregation + TAGConv layers - No final convolution layer, ends with EdgeAggregation - Equivalent to torch_geometric MultiMPN version. + Equivalent to original MultiMPN version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): @@ -454,7 +449,7 @@ class MultiMPN(BaseMPN): return x -class MaskEmbdMultiMPN(BaseMPN): +class MaskEmbedMultiMPN(BaseMPN): """Wrapped Message Passing Network with Mask Embedding + Multi-step MP+Conv. Architecture: @@ -462,7 +457,7 @@ class MaskEmbdMultiMPN(BaseMPN): - Multi-step EdgeAggregation + TAGConv layers - No final convolution layer, ends with EdgeAggregation - Equivalent to torch_geometric MaskEmbdMultiMPN version. + Equivalent to original MaskEmbedMultiMPN version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): @@ -530,14 +525,14 @@ class MaskEmbdMultiMPN(BaseMPN): return x -class MaskEmbdMultiMPNNoMP(BaseMPN): +class MaskEmbedMultiMPNNoMP(BaseMPN): """Wrapped Message Passing Network with Mask Embedding, Multi-step MP+Conv, No MP. Architecture: - Mask embedding layer - Multi-step TAGConv layers (no EdgeAggregation except at end) - Equivalent to torch_geometric MaskEmbdMultiMPN_NoMP version. + Equivalent to original MaskEmbedMultiMPN_NoMP version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): @@ -576,9 +571,9 @@ class MaskEmbdMultiMPNNoMP(BaseMPN): Expects 12D input: [one-hot bus_type(4) + features(4) + mask(4)] """ assert data.x.shape[-1] == self.nfeature_dim * 2 + 4, ( - f"MaskEmbdMultiMPNNoMP expects 12D input " + f"MaskEmbedMultiMPNNoMP expects 12D input " f"[one-hot(4) + features({self.nfeature_dim}) + mask({self.nfeature_dim})], " - f"got {data.x.shape[-1]}D. Use mpn, gcn, mask_embd_multi_mpn, or mlp for 4D data." + f"got {data.x.shape[-1]}D. Use mpn, gcn, mask_embed_multi_mpn, or mlp for 4D data." ) x = data.x[:, 4:4+self.nfeature_dim] # Extract features from 12D input mask = data.x[:, -self.nfeature_dim:] # Extract mask from 12D input (last nfeature_dim columns) @@ -656,7 +651,7 @@ class MultiConvNet(BaseMPN): - No message passing to aggregate edge features - Multi-level parallel Conv layers for different edge features - Equivalent to torch_geometric MultiConvNet version. + Equivalent to original MultiConvNet version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): @@ -734,7 +729,7 @@ class MPNSimplenet(BaseMPN): - One-time Message Passing to aggregate edge features into node features - Multiple Conv layers - Equivalent to torch_geometric MPNSimplenet version. + Equivalent to original MPNSimplenet version. """ def __init__(self, nfeature_dim, efeature_dim, output_dim, hidden_dim, n_gnn_layers, k, dropout_rate): diff --git a/MindEnergy/applications/PowerFlowNet/src/power_flow_data.py b/MindEnergy/applications/PowerFlowNet/src/power_flow_data.py index 161fa43b8..5d4ef58a2 100644 --- a/MindEnergy/applications/PowerFlowNet/src/power_flow_data.py +++ b/MindEnergy/applications/PowerFlowNet/src/power_flow_data.py @@ -21,7 +21,7 @@ """PowerFlow Data Processing Module for MindSpore. This module provides comprehensive data loading and processing utilities for PowerFlow -networks, implementing a complete data pipeline adapted from the original PyTorch version +networks, implementing a complete data pipeline adapted from the original version to leverage MindSpore's tensor operations and device optimization capabilities. Key Components: @@ -50,7 +50,7 @@ Key Components: - Graph collation with proper edge index offset handling 5. MindSpore Adaptations: - - Replaced PyTorch DataLoader with MindSpore-native implementation + - MindSpore-native implementation - Tensor operations using MindSpore ops and mint modules - Device-agnostic design (CPU/Ascend compatible) - Efficient memory management with numpy-based preprocessing @@ -107,7 +107,7 @@ class PowerFlowData(InMemoryDataset): """PowerFlow dataset for graph neural network training - MindSpore Implementation. A comprehensive dataset class implementing the complete data pipeline for power flow - prediction tasks. Adapted from PyTorch version with major enhancements for MindSpore. + prediction tasks. Adapted from original version with major enhancements for MindSpore. Features: --------- @@ -132,8 +132,8 @@ class PowerFlowData(InMemoryDataset): MindSpore Enhancements: ---------------------- - - Replaced torch_geometric.data.InMemoryDataset with custom implementation - - MindSpore Tensor instead of PyTorch tensors + - Custom implementation InMemoryDataset + - MindSpore Tensor usage throughout data pipeline - MindSpore ops for normalization (mean, std, cat) - Device-agnostic design (automatic CPU/Ascend compatibility) - Efficient edge offset calculation in batch collation @@ -404,11 +404,11 @@ class PowerFlowData(InMemoryDataset): def _setup_paths(self): """Setup raw and processed paths""" - # V2 datasets use mindspore/raw, legacy datasets use torch/raw + # V2 datasets use mindspore/raw, legacy datasets use mindspore/raw if self.case.endswith('v2'): self.raw_dir = os.path.join(self.root, 'mindspore', 'raw') else: - self.raw_dir = os.path.join(self.root, 'torch', 'raw') + self.raw_dir = os.path.join(self.root, 'mindspore', 'raw') self.processed_dir = os.path.join(self.root, 'mindspore', 'processed') os.makedirs(self.raw_dir, exist_ok=True) os.makedirs(self.processed_dir, exist_ok=True) @@ -619,7 +619,7 @@ class PowerFlowDataLoader: A fully MindSpore-native data loader providing efficient batching and graph collation for power flow datasets. This is a custom implementation adapted - from PyTorch's DataLoader with MindSpore-specific optimizations. + from original DataLoader with MindSpore-specific optimizations. Key Features: ============= diff --git a/MindEnergy/applications/PowerFlowNet/src/training.py b/MindEnergy/applications/PowerFlowNet/src/training.py index da3359c8d..15f441634 100644 --- a/MindEnergy/applications/PowerFlowNet/src/training.py +++ b/MindEnergy/applications/PowerFlowNet/src/training.py @@ -15,7 +15,7 @@ """ Training utilities for MindSpore PowerFlowNet. -This module provides training functions aligned with PyTorch training utilities: +This module provides training functions aligned with original training utilities: - train_epoch: Train model for one epoch with gradient updates - append_to_json: Append training results to JSON log file diff --git a/MindEnergy/applications/PowerFlowNet/train.py b/MindEnergy/applications/PowerFlowNet/train.py new file mode 100644 index 000000000..84894f5e3 --- /dev/null +++ b/MindEnergy/applications/PowerFlowNet/train.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python3 + +# Copyright 2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This file is a derivative work based on the original PowerFlowNet implementation +# (https://github.com/stavrosorf/poweflownet) which was licensed under the MIT License. +# Significant modifications have been made to adapt the code for the MindSpore framework, +# including MindSpore equivalents and +# optimization for Ascend hardware acceleration. +# ============================================================================ +""" +Training script for MindSpore PowerFlowNet +Supports both v2 dataset (4D features) and legacy dataset (12D features) +- v2 dataset: Compatible with mlp, mpn, gcn, mask_embed_multi_mpn, mpn_simplenet +- legacy dataset: Additionally supports skip_mpn, mask_embed_mpn, multi_mpn, etc. +Includes logging, weight saving, and visualization +""" +import argparse +import json +import os +import sys +import time +from datetime import datetime +from pathlib import Path + +import matplotlib.pyplot as plt +import mindspore as ms +from mindspore import nn + +from configs.config import init_device +from src import ( + MLPNet, MPN, GCNNet, SkipMPN, MaskEmbedMPN, MultiMPN, + MaskEmbedMultiMPN, MaskEmbedMultiMPNNoMP, MultiConvNet, MPNSimplenet +) +from src.custom_loss_functions import MaskedL2Loss, PowerImbalance, MixedMSEPowerImbalance +from src.power_flow_data import PowerFlowDataV2, PowerFlowDataLoaderV2 + +# Unset RANK_TABLE_FILE to avoid Ascend distributed training mode +# which forces JIT level O2 and causes optimizer compilation issues +if 'RANK_TABLE_FILE' in os.environ: + del os.environ['RANK_TABLE_FILE'] + +sys.path.insert(0, str(Path(__file__).parent)) + +# Check if running on Ascend - use PYNATIVE mode for compatibility +def is_ascend(): + """Check if running on Ascend device""" + try: + device_target = ms.get_context("device_target") + return device_target == "Ascend" + except Exception: + return False + + +class MaskedMSELoss(nn.Cell): + """Masked MSE loss - for backward compatibility and simple loss""" + + def __init__(self): + super().__init__() + self.mse = nn.MSELoss(reduction='none') + + def construct(self, pred, target, mask): + loss = self.mse(pred, target) + return (loss * mask).mean() + + + + + +class Trainer: + """Training manager with logging and checkpointing""" + + def __init__(self, model, train_loader, val_loader, test_loader, + args, log_dir='logs', model_dir='models', + norm_stats=None): + self.model = model + self.train_loader = train_loader + self.val_loader = val_loader + self.test_loader = test_loader + self.args = args + self.norm_stats = norm_stats # (xymean, xystd, edgemean, edgestd) + + # Setup directories + self.run_id = datetime.now().strftime("%Y%m%d-%H%M%S") + self.log_dir = Path(log_dir) / args.case / f"{args.model}_{self.run_id}" + self.model_dir = Path(model_dir) / args.case / f"{args.model}_{self.run_id}" + self.log_dir.mkdir(parents=True, exist_ok=True) + self.model_dir.mkdir(parents=True, exist_ok=True) + + # Setup optimizer + self.optimizer = nn.Adam(model.trainable_params(), learning_rate=args.lr) + + # Create loss function based on args + self.setup_loss_function() + + # History for logging + self.history = { + 'train_loss': [], + 'val_loss': [], + 'test_loss': None, + 'epochs': [], + } + + # Early stopping + self.best_val_loss = float('inf') + self.patience = args.patience + self.patience_counter = 0 + + # Save config + self.save_config() + + def setup_loss_function(self): + """Setup loss function based on training configuration""" + if hasattr(self.args, 'train_loss_fn'): + loss_fn_type = self.args.train_loss_fn.lower() + else: + loss_fn_type = 'masked_mse' # default + + if loss_fn_type in ('masked_l2', 'masked_mse'): + self.loss_fn = MaskedL2Loss() + self.eval_loss_fn = MaskedL2Loss() + elif loss_fn_type == 'power_imbalance': + # Physics-informed power imbalance loss + if self.norm_stats is not None: + xymean, xystd, edgemean, edgestd = self.norm_stats + self.loss_fn = PowerImbalance(xymean, xystd, edgemean, edgestd) + else: + print("⚠ Warning: norm_stats not provided, falling back to MaskedL2Loss") + self.loss_fn = MaskedL2Loss() + self.eval_loss_fn = MaskedL2Loss() + elif loss_fn_type == 'mixed_mse_power_imbalance': + # Mixed MSE + Power Imbalance loss + if self.norm_stats is not None: + xymean, xystd, edgemean, edgestd = self.norm_stats + self.loss_fn = MixedMSEPowerImbalance(xymean, xystd, edgemean, edgestd, alpha=0.9) + else: + print("⚠ Warning: norm_stats not provided, falling back to MaskedL2Loss") + self.loss_fn = MaskedL2Loss() + self.eval_loss_fn = MaskedL2Loss() + else: + # Default simple MSE loss + self.loss_fn = MaskedMSELoss() + self.eval_loss_fn = MaskedMSELoss() + + def save_config(self): + """Save training configuration""" + config = { + 'case': self.args.case, + 'model': self.args.model, + 'epochs': self.args.epochs, + 'batch_size': self.args.batch_size, + 'lr': self.args.lr, + 'hidden_dim': self.args.hidden_dim, + 'device': self.args.device, + 'run_id': self.run_id, + } + config_path = self.log_dir / 'config.json' + with open(config_path, 'w', encoding='utf-8') as f: + json.dump(config, f, indent=2) + print(f"✓ Config saved to {config_path}") + + def train_epoch(self): + """Train for one epoch""" + self.model.set_train(True) + total_loss = 0.0 + num_batches = 0 + + # Define gradient function based on loss type + loss_fn_type = getattr(self.args, 'train_loss_fn', 'masked_l2').lower() + + if loss_fn_type == 'power_imbalance': + def forward_fn(batch): + pred = self.model(batch) + masked_pred = pred * batch.pred_mask + batch.x * (1 - batch.pred_mask) + loss = self.loss_fn(masked_pred, batch.edge_index, batch.edge_attr) + return loss + elif loss_fn_type == 'mixed_mse_power_imbalance': + def forward_fn(batch): + pred = self.model(batch) + loss = self.loss_fn(pred, batch.edge_index, batch.edge_attr, batch.y) + return loss + else: + def forward_fn(batch): + pred = self.model(batch) + loss = self.loss_fn(pred, batch.y, batch.pred_mask) + return loss + + grad_fn = ms.value_and_grad(forward_fn, None, self.model.trainable_params()) + + for batch in self.train_loader: + # Compute loss and gradients + loss, grads = grad_fn(batch) + + # Update parameters + self.optimizer(grads) + + total_loss += float(loss.asnumpy()) + num_batches += 1 + return total_loss / max(1, num_batches) + + def validate(self): + """Validate model""" + self.model.set_train(False) + total_loss = 0.0 + num_batches = 0 + + for batch in self.val_loader: + # batch is already a Batch object from DataLoader + pred = self.model(batch) + loss = self.eval_loss_fn(pred, batch.y, batch.pred_mask) + total_loss += float(loss.asnumpy()) + num_batches += 1 + + return total_loss / max(1, num_batches) + + def test(self): + """Test model""" + self.model.set_train(False) + total_loss = 0.0 + num_batches = 0 + + for batch in self.test_loader: + # batch is already a Batch object from DataLoader + pred = self.model(batch) + loss = self.eval_loss_fn(pred, batch.y, batch.pred_mask) + total_loss += float(loss.asnumpy()) + num_batches += 1 + + return total_loss / max(1, num_batches) + + def save_model(self, epoch): + """Save model checkpoint""" + model_path = self.model_dir / f'model_epoch_{epoch}.ckpt' + ms.save_checkpoint(self.model, str(model_path)) + print(f"✓ Model saved to {model_path}") + + def train(self): + """Main training loop""" + print(f"\n{'='*60}") + print(f"Training {self.args.model.upper()} on {self.args.case}") + print(f"{'='*60}") + + for epoch in range(1, self.args.epochs + 1): + start_time = time.time() + + # Train + train_loss = self.train_epoch() + + # Validate + val_loss = self.validate() + + # Log + self.history['train_loss'].append(train_loss) + self.history['val_loss'].append(val_loss) + self.history['epochs'].append(epoch) + + elapsed = time.time() - start_time + print(f"Epoch {epoch:3d} | Train Loss: {train_loss:.6f} | Val Loss: {val_loss:.6f} | {elapsed:.2f}s") + + # Early stopping check + if val_loss < self.best_val_loss: + self.best_val_loss = val_loss + self.patience_counter = 0 + self.save_model(epoch) + else: + self.patience_counter += 1 + # patience=0 means disabled + if self.patience > 0 and self.patience_counter >= self.patience: + print(f"\n✓ Early stopping at epoch {epoch}") + break + + # Test + print(f"\n{'='*60}") + test_loss = self.test() + self.history['test_loss'] = test_loss + print(f"Test Loss: {test_loss:.6f}") + print(f"{'='*60}\n") + + # Save history + self.save_history() + self.plot_training() + + def save_history(self): + """Save training history""" + history_path = self.log_dir / 'history.json' + history = { + 'train_loss': [float(x) for x in self.history['train_loss']], + 'val_loss': [float(x) for x in self.history['val_loss']], + 'test_loss': float(self.history['test_loss']) if self.history['test_loss'] else None, + 'epochs': self.history['epochs'], + } + with open(history_path, 'w', encoding='utf-8') as f: + json.dump(history, f, indent=2) + print(f"✓ History saved to {history_path}") + + def plot_training(self): + """Plot training curves""" + plt.figure(figsize=(10, 5)) + plt.plot(self.history['epochs'], self.history['train_loss'], 'b-', label='Train Loss') + plt.plot(self.history['epochs'], self.history['val_loss'], 'r-', label='Val Loss') + plt.xlabel('Epoch') + plt.ylabel('Loss') + plt.legend() + plt.title(f'{self.args.model.upper()} Training Curve') + plt.grid(True, alpha=0.3) + + plot_path = self.log_dir / 'training_curve.png' + plt.savefig(plot_path, dpi=150, bbox_inches='tight') + plt.close() + print(f"✓ Plot saved to {plot_path}") + + +def _get_model_config(args) -> dict: + """Extract model configuration from arguments.""" + return { + 'nfeature_dim': 4, + 'efeature_dim': 2, + 'output_dim': 4, + 'hidden_dim': args.hidden_dim, + 'n_gnn_layers': args.n_layers, + 'k': args.k, + 'dropout_rate': args.dropout, + } + + +def _build_gnn_model(model_name: str, config: dict): + """Build GNN models (MPN variants and GCN).""" + gnn_map = { + 'mpn': MPN, + 'skip_mpn': SkipMPN, + 'mask_embed_mpn': MaskEmbedMPN, + 'multi_mpn': MultiMPN, + 'mask_embed_multi_mpn': MaskEmbedMultiMPN, + 'mask_embed_multi_mpn_nomp': MaskEmbedMultiMPNNoMP, + 'mpn_simplenet': MPNSimplenet, + } + + if model_name in gnn_map: + return gnn_map[model_name](**config) + if model_name == 'multi_conv_net': + config['efeature_dim'] = 5 # MultiConvNet requires 5 edge features + return MultiConvNet(**config) + + raise ValueError(f"Unknown GNN model: {model_name}") + + +def _build_simple_model(model_name: str, args) -> nn.Cell: + """Build MLP or GCN models.""" + if model_name == 'mlp': + return MLPNet( + nfeature_dim=4, output_dim=4, + hidden_dim=args.hidden_dim, n_layers=args.n_layers, + dropout_rate=args.dropout + ) + if model_name == 'gcn': + return GCNNet( + nfeature_dim=4, output_dim=4, + hidden_dim=args.hidden_dim, n_gnn_layers=args.n_layers, + dropout_rate=args.dropout + ) + + raise ValueError(f"Unknown simple model: {model_name}") + + +def create_model(args) -> nn.Cell: + """ + Create model based on args. + + Models are divided into: + 1. Simple models (MLP, GCN): 4D input + 2. GNN models (MPN variants): 4D or 12D input + """ + # Try simple models first + simple = _build_simple_model(args.model, args) + if simple is not None: + return simple + + # Build GNN models + config = _get_model_config(args) + gnn = _build_gnn_model(args.model, config) + if gnn is not None: + return gnn + + raise ValueError(f"Unknown model: {args.model}") + + +def _setup_training_parser() -> argparse.ArgumentParser: + """Setup argument parser for training.""" + parser = argparse.ArgumentParser(description='Train PowerFlowNet with multiple dataset formats') + + parser.add_argument('--case', type=str, default='14', + help='Case name. Use v2 suffix (e.g., 14v2, 118v2) for V2 format') + parser.add_argument('--data-root', type=str, default='./data', help='Data root directory') + parser.add_argument('--model', type=str, default='mpn', + choices=['mlp', 'mpn', 'gcn', 'skip_mpn', 'mask_embed_mpn', + 'multi_mpn', 'mask_embed_multi_mpn', 'mask_embed_multi_mpn_nomp', + 'multi_conv_net', 'mpn_simplenet'], + help='Model type') + parser.add_argument('--hidden-dim', type=int, default=64, help='Hidden dimension') + parser.add_argument('--n-layers', type=int, default=3, help='Number of layers') + parser.add_argument('--k', type=int, default=3, help='k for TAGConv') + parser.add_argument('--dropout', type=float, default=0.1, help='Dropout rate') + parser.add_argument('--epochs', type=int, default=10, help='Number of epochs') + parser.add_argument('--batch-size', type=int, default=256, help='Batch size') + parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') + parser.add_argument('--patience', type=int, default=0, help='Early stopping patience') + parser.add_argument('--train-loss-fn', type=str, default='masked_l2', + choices=['masked_l2', 'power_imbalance', 'mixed_mse_power_imbalance'], + help='Training loss function') + parser.add_argument('--device', type=str, default='CPU', choices=['CPU', 'GPU', 'Ascend'], + help='Device to use') + + return parser + + +def _load_training_datasets(args): + """Load train/val/test datasets based on case format.""" + split = [0.7, 0.15, 0.15] + is_v2 = args.case.endswith('v2') + + if is_v2: + print(f"Using V2 dataset format for case '{args.case}'") + train_ds = PowerFlowDataV2(root=args.data_root, case=args.case, split=split, + task='train', normalize=True) + xymean, xystd, edgemean, edgestd = train_ds.get_normalization_stats() + val_ds = PowerFlowDataV2(root=args.data_root, case=args.case, split=split, + task='val', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + test_ds = PowerFlowDataV2(root=args.data_root, case=args.case, split=split, + task='test', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + train_loader = PowerFlowDataLoaderV2(train_ds, batch_size=args.batch_size, shuffle=True) + val_loader = PowerFlowDataLoaderV2(val_ds, batch_size=args.batch_size, shuffle=False) + test_loader = PowerFlowDataLoaderV2(test_ds, batch_size=args.batch_size, shuffle=False) + else: + print(f"Using legacy dataset format for case '{args.case}'") + train_ds = PowerFlowData(root=args.data_root, case=args.case, split=split, + task='train', normalize=True) + xymean, xystd, edgemean, edgestd = train_ds.get_data_means_stds() + val_ds = PowerFlowData(root=args.data_root, case=args.case, split=split, + task='val', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + test_ds = PowerFlowData(root=args.data_root, case=args.case, split=split, + task='test', normalize=True, + xymean=xymean, xystd=xystd, edgemean=edgemean, edgestd=edgestd) + train_loader = PowerFlowDataLoader(train_ds, batch_size=args.batch_size, shuffle=True) + val_loader = PowerFlowDataLoader(val_ds, batch_size=args.batch_size, shuffle=False) + test_loader = PowerFlowDataLoader(test_ds, batch_size=args.batch_size, shuffle=False) + + print("✓ Normalization stats computed from training set") + print(f"✓ Train: {len(train_ds)}, Val: {len(val_ds)}, Test: {len(test_ds)}") + + return train_loader, val_loader, test_loader, (xymean, xystd, edgemean, edgestd) + + +def _warn_12d_models(model_name: str) -> None: + """Warn about 12D model compatibility with 4D datasets.""" + models_12d = ['skip_mpn', 'mask_embed_mpn', 'multi_mpn', + 'mask_embed_multi_mpn_nomp', 'multi_conv_net'] + if model_name in models_12d: + print(f"\n⚠️ WARNING: Model '{model_name}' expects 12D input format:") + print(" [one-hot(4) + features(4) + mask(4)]") + print(" Current dataset provides 4D features only.\n") + + +def main(): + """Main training function.""" + parser = _setup_training_parser() + args = parser.parse_args() + + # Initialize device + init_device(args.device) + ms.set_context(mode=ms.PYNATIVE_MODE) + if args.device == 'Ascend': + try: + ms.set_context(jit_config={"jit_level": "O0"}) + print("✓ JIT level set to O0 for Ascend compatibility") + except Exception as e: + print(f"⚠ Could not set JIT config: {e}") + + print("\n📊 Loading datasets...") + train_loader, val_loader, test_loader, norm_stats = _load_training_datasets(args) + + _warn_12d_models(args.model) + + # Create model + print(f"\n🧠 Creating {args.model.upper()} model...") + model = create_model(args) + + num_params = sum(p.size for p in model.trainable_params()) + print(f"✓ Model has {num_params:,} trainable parameters") + + # Train + trainer = Trainer(model, train_loader, val_loader, test_loader, args, norm_stats=norm_stats) + trainer.train() + + print("✅ Training completed!") + + +if __name__ == '__main__': + main() -- Gitee