From e862c8a3f6f78896856d9cb4f849f0e1aa1087d5 Mon Sep 17 00:00:00 2001 From: MNxyz Date: Wed, 29 Oct 2025 15:10:39 +0000 Subject: [PATCH 1/3] add mindscience/models/neural_operator/readme. Signed-off-by: MNxyz --- mindscience/models/neural_operator/readme | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 mindscience/models/neural_operator/readme diff --git a/mindscience/models/neural_operator/readme b/mindscience/models/neural_operator/readme new file mode 100644 index 000000000..e69de29bb -- Gitee From 7c86c271f2b799947ee74b8f6eb1f5f0f2db3a92 Mon Sep 17 00:00:00 2001 From: MNxyz Date: Thu, 30 Oct 2025 10:19:33 +0000 Subject: [PATCH 2/3] =?UTF-8?q?update=20mindscience/models/neural=5Foperat?= =?UTF-8?q?or/afno2d.py.=20=E6=B7=BB=E5=8A=A0=E7=94=A8=E4=BE=8B=E8=AF=B4?= =?UTF-8?q?=E6=98=8E=E5=92=8Cexample?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MNxyz --- mindscience/models/neural_operator/afno2d.py | 194 +++++++++++++++++-- 1 file changed, 175 insertions(+), 19 deletions(-) diff --git a/mindscience/models/neural_operator/afno2d.py b/mindscience/models/neural_operator/afno2d.py index 22873042c..c2551ba1b 100644 --- a/mindscience/models/neural_operator/afno2d.py +++ b/mindscience/models/neural_operator/afno2d.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -'''Module providing afno2d''' +'''Module providing afno2d +This module implements AFNO2D and supporting components tailored for use with MindSpore. +The implementation is intended to be used in vision / token-based feature pipelines. +Only docstrings in this file are explanatory; the code logic is unchanged. +''' import numpy as np from mindspore import ops, nn, Tensor, Parameter @@ -23,19 +27,35 @@ from ...sciops import RDFTn, IRDFTn class DropPath(nn.Cell): - """ + r""" + Stochastic depth (DropPath) layer which randomly drops entire samples in a batch. Args: + drop_prob (float): Probability of dropping a sample (stochastic depth rate). Default: ``0.0``. + scale_by_keep (bool): Whether to scale kept samples by 1/(1-drop_prob) to preserve expected value. + Default: ``True``. Inputs: + x (Tensor): Input tensor of shape :math:`(batch, ..., channels)` (any rank >= 2). + The mask is sampled per batch and broadcast across remaining dimensions. Outputs: + Tensor: Same shape as input `x`. Some samples (entire sample entries across spatial/feature dims) + will be zeroed according to the DropPath mask. When `drop_prob` is 0.0, returns `x` unchanged. + + Raises: + TypeError: If `drop_prob` is not a float or `scale_by_keep` is not a bool. Supported Platforms: + ``Ascend`` Examples: - - + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> import numpy as np + >>> dp = DropPath(drop_prob=0.2) + >>> x = Tensor(np.ones([4, 8, 64]), mstype.float32) + >>> y = dp(x) """ def __init__(self, drop_prob=0.0, scale_by_keep=True): super(DropPath, self).__init__() @@ -56,19 +76,41 @@ class DropPath(nn.Cell): class Mlp(nn.Cell): - """ + r""" + Feed-forward MLP block used inside transformer-like layers. + + Architecture: + fc1 -> GELU -> dropout -> fc2 -> dropout Args: + embed_dims (int): Input and output embedding dimensionality. + mlp_ratio (int): Expansion ratio for the hidden layer (hidden = embed_dims * mlp_ratio). + dropout_rate (float): Dropout keep probability (MindSpore Dropout uses keep_prob). Default: ``1.0``. + compute_dtype (dtype.Number): Computation dtype for Dense layers. Default: ``mstype.float16``. Inputs: + x (Tensor): Input tensor of shape :math:`(batch, seq\_len, embed\_dims)`. Outputs: + Tensor: Output tensor of shape :math:`(batch, seq\_len, embed\_dims)`, same as input. + + Raises: + TypeError: If `embed_dims` or `mlp_ratio` is not an int. + TypeError: If `dropout_rate` is not a float. + TypeError: If `compute_dtype` is not a valid MindSpore dtype. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> mlp = Mlp(embed_dims=256, mlp_ratio=4, dropout_rate=0.9, compute_dtype=mstype.float16) + >>> x = Tensor(np.ones([2, 100, 256]), mstype.float32) + >>> out = mlp(x) + >>> print(out.shape) + (2, 100, 256) """ def __init__(self, embed_dims, @@ -106,19 +148,47 @@ class Mlp(nn.Cell): class AFNOBlock(nn.Cell): - """ + r""" + AFNO Block combining spectral AFNO2D filtering with transformer-style residual MLP. + + The block performs: + x -> LayerNorm -> AFNO2D -> +residual + -> LayerNorm -> MLP -> DropPath -> +residual Args: + embed_dims (int): Token embedding dimensionality. + mlp_ratio (int): Expansion ratio for the MLP hidden layer. + dropout_rate (float): Dropout keep probability. Default: ``1.0``. + drop_path (float): Stochastic depth drop probability (used if > 0). Default: ``0.0``. + h_size (int): Original input height in pixels (used to infer AFNO grid height when combined with patch_size). + w_size (int): Original input width in pixels. + patch_size (int): Patch size used when reconstructing spatial grid inside AFNO2D. + num_blocks (int): Number of channel blocks inside AFNO2D. + high_freq (bool): Enable high-frequency pathway in AFNO2D. Default: ``False``. + compute_dtype (dtype.Number): Computation dtype for submodules. Default: ``mstype.float16``. Inputs: + x (Tensor): Token tensor of shape :math:`(batch, n\_patches, embed\_dims)`. Outputs: + Tensor: Output tensor of same shape as input: :math:`(batch, n\_patches, embed\_dims)`. + + Raises: + TypeError: If numeric args are not of expected types. + ValueError: If `embed_dims` is not divisible by `num_blocks`. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> block = AFNOBlock(embed_dims=768, mlp_ratio=4, dropout_rate=0.9, h_size=128, w_size=256, patch_size=8) + >>> x = Tensor(np.ones([2, (128//8)*(256//8), 768]), mstype.float32) + >>> y = block(x) + >>> print(y.shape) + (2, 512, 768) """ def __init__(self, embed_dims, @@ -163,19 +233,42 @@ class AFNOBlock(nn.Cell): class PatchEmbed(nn.Cell): - """ + r""" + Patch embedding implemented via a Conv2d projection. + + The convolution uses kernel_size = stride = patch_size to form non-overlapping patches. + Output tokens are returned as (batch, num_patches, embed_dims). Args: + in_channels (int): Number of input channels (e.g., 1 or 3). + embed_dims (int): Output embedding dimensionality per patch. + patch_size (int): Patch size used as conv kernel and stride. Default: ``16``. + compute_dtype (dtype.Number): Computation dtype for convolution. Default: ``mstype.float16``. Inputs: + x (Tensor): Input image tensor of shape :math:`(batch, in\_channels, H, W)`. + H and W must be divisible by `patch_size`. Outputs: + Tensor: Tensor of shape :math:`(batch, num\_patches, embed\_dims)` where + :math:`num\_patches = (H // patch\_size) * (W // patch\_size)`. + + Raises: + TypeError: If `in_channels`, `embed_dims`, or `patch_size` are not ints. + ValueError: If H or W are not divisible by `patch_size` at runtime. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> pe = PatchEmbed(in_channels=1, embed_dims=768, patch_size=16) + >>> img = Tensor(np.ones([2, 1, 128, 128]), mstype.float32) + >>> tokens = pe(img) + >>> print(tokens.shape) + (2, 64, 768) """ def __init__(self, in_channels, @@ -201,19 +294,48 @@ class PatchEmbed(nn.Cell): class ForwardFeatures(nn.Cell): - """ + r""" + ForwardFeatures stacks patch embedding, learnable positional embeddings and multiple AFNOBlocks. + + The module maps raw input images/fields to a token sequence and then applies `depth` + AFNOBlock layers. Args: + grid_size (tuple[int, int]): Grid of patches as (H_patches, W_patches). + h_size (int): Original height in pixels (used to compute AFNO internal grid). + w_size (int): Original width in pixels. + in_channels (int): Number of input channels (e.g., 1 or 3). + patch_size (int): Patch size used by PatchEmbed. + depth (int): Number of AFNOBlock layers to stack. + embed_dims (int): Embedding dimensionality of tokens. + mlp_ratio (int): MLP expansion ratio inside AFNOBlock. Default: ``4``. + dropout_rate (float): Positional dropout keep probability. Default: ``1.0``. + compute_dtype (dtype.Number): Computation dtype for submodules. Default: ``mstype.float16``. Inputs: + x (Tensor): Input tensor of shape :math:`(batch, in\_channels, H, W)`. Outputs: + Tensor: Tensor of shape :math:`(batch, num\_patches, embed\_dims)` where + :math:`num\_patches = grid\_size[0] * grid\_size[1]`. + + Raises: + TypeError: If `grid_size` is not a tuple of two ints. + TypeError: If `depth` or `embed_dims` is not an int. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> ff = ForwardFeatures(grid_size=(16,16), h_size=128, w_size=128, in_channels=1, + ... patch_size=8, depth=6, embed_dims=768) + >>> img = Tensor(np.ones([2, 1, 128, 128]), mstype.float32) + >>> tokens = ff(img) + >>> print(tokens.shape) + (2, 256, 768) """ def __init__(self, grid_size, @@ -267,19 +389,53 @@ class ForwardFeatures(nn.Cell): class AFNO2D(nn.Cell): - """ + r""" + Adaptive Fourier Neural Operator (AFNO) 2D implementation. + + This cell applies learned, block-wise linear transforms in Fourier domain over channel groups, + with optional high-frequency pathway, sparsification and inverse transform to produce filtered tokens. + + High-level steps: + 1. Reshape input tokens to spatial grid (b, h, w, c). + 2. Apply real DFT (RDFTn) -> (real, imag). + 3. Partition channel dimension into `num_blocks` blocks and apply complex linear maps parameterized + by weights (w1, w2) and biases (b1, b2). + 4. Apply ReLU, zero out frequencies beyond `kept_modes`, further linear maps, softshrink sparsification. + 5. Inverse RDFT (IRDFTn) and add residual/bias. Args: + h_size (int): Height in tokens (number of vertical patches). + w_size (int): Width in tokens (number of horizontal patches). + mean (optional[array or Tensor]): Optional mean for input normalization (kept as attribute). Default: ``None``. + std (optional[array or Tensor]): Optional std for input normalization (kept as attribute). Default: ``None``. + embed_dims (int): Embedding dimensionality (number of channels). Default: ``768``. + num_blocks (int): Number of channel blocks used for block-wise transforms. Default: ``8``. + high_freq (bool): Whether to enable a high-frequency pathway using a learnable Dense. Default: ``False``. + compute_dtype (dtype.Number): Computation dtype for parameters and transforms. Default: ``mstype.float16``. Inputs: + x (Tensor): Token tensor of shape :math:`(batch, n\_patches, embed\_dims)`. + Here :math:`n\_patches = h\_size * w\_size`. Outputs: + Tensor: Tensor of shape :math:`(batch, n\_patches, embed\_dims)`, the AFNO-filtered tokens. + + Raises: + TypeError: If `h_size`, `w_size`, `embed_dims` or `num_blocks` are not ints. + ValueError: If `embed_dims` is not divisible by `num_blocks`. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> afno = AFNO2D(h_size=16, w_size=16, embed_dims=768, num_blocks=8) + >>> tokens = Tensor(np.ones([2, 256, 768]), mstype.float32) # 16*16=256 patches + >>> out = afno(tokens) + >>> print(out.shape) + (2, 256, 768) """ def __init__(self, h_size=32, -- Gitee From 4b565945e471af94e79ab2403bc868b801a173b1 Mon Sep 17 00:00:00 2001 From: MNxyz Date: Thu, 30 Oct 2025 11:57:45 +0000 Subject: [PATCH 3/3] update mindscience/models/neural_operator/afno2d.py. Signed-off-by: MNxyz --- mindscience/models/neural_operator/afno2d.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mindscience/models/neural_operator/afno2d.py b/mindscience/models/neural_operator/afno2d.py index c2551ba1b..f76431551 100644 --- a/mindscience/models/neural_operator/afno2d.py +++ b/mindscience/models/neural_operator/afno2d.py @@ -58,7 +58,7 @@ class DropPath(nn.Cell): >>> y = dp(x) """ def __init__(self, drop_prob=0.0, scale_by_keep=True): - super(DropPath, self).__init__() + super().__init__() self.drop_prob = drop_prob self.keep_prob = 1.0 - self.drop_prob self.scale_by_keep = scale_by_keep @@ -117,7 +117,7 @@ class Mlp(nn.Cell): mlp_ratio, dropout_rate=1.0, compute_dtype=mstype.float16): - super(Mlp, self).__init__() + super().__init__() self.fc1 = nn.Dense( embed_dims, embed_dims * mlp_ratio, weight_init=initializer( @@ -201,7 +201,7 @@ class AFNOBlock(nn.Cell): num_blocks=8, high_freq=False, compute_dtype=mstype.float16): - super(AFNOBlock, self).__init__() + super().__init__() self.embed_dims = embed_dims self.layer_norm = nn.LayerNorm([embed_dims], epsilon=1e-6).to_float(compute_dtype) @@ -275,7 +275,7 @@ class PatchEmbed(nn.Cell): embed_dims, patch_size=16, compute_dtype=mstype.float16): - super(PatchEmbed, self).__init__() + super().__init__() self.compute_dtype = compute_dtype self.proj = nn.Conv2d(in_channels=in_channels, out_channels=embed_dims, @@ -348,7 +348,7 @@ class ForwardFeatures(nn.Cell): mlp_ratio=4, dropout_rate=1.0, compute_dtype=mstype.float16): - super(ForwardFeatures, self).__init__() + super().__init__() self.patch_embed = PatchEmbed( in_channels, embed_dims, patch_size, compute_dtype=compute_dtype ) -- Gitee