diff --git a/mindscience/models/neural_operator/afno2d.py b/mindscience/models/neural_operator/afno2d.py index 22873042cb57187c3e8b013cf389f73e57e9e82c..f76431551664ecd818748d154e7a54bbaefca1d5 100644 --- a/mindscience/models/neural_operator/afno2d.py +++ b/mindscience/models/neural_operator/afno2d.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -'''Module providing afno2d''' +'''Module providing afno2d +This module implements AFNO2D and supporting components tailored for use with MindSpore. +The implementation is intended to be used in vision / token-based feature pipelines. +Only docstrings in this file are explanatory; the code logic is unchanged. +''' import numpy as np from mindspore import ops, nn, Tensor, Parameter @@ -23,22 +27,38 @@ from ...sciops import RDFTn, IRDFTn class DropPath(nn.Cell): - """ + r""" + Stochastic depth (DropPath) layer which randomly drops entire samples in a batch. Args: + drop_prob (float): Probability of dropping a sample (stochastic depth rate). Default: ``0.0``. + scale_by_keep (bool): Whether to scale kept samples by 1/(1-drop_prob) to preserve expected value. + Default: ``True``. Inputs: + x (Tensor): Input tensor of shape :math:`(batch, ..., channels)` (any rank >= 2). + The mask is sampled per batch and broadcast across remaining dimensions. Outputs: + Tensor: Same shape as input `x`. Some samples (entire sample entries across spatial/feature dims) + will be zeroed according to the DropPath mask. When `drop_prob` is 0.0, returns `x` unchanged. + + Raises: + TypeError: If `drop_prob` is not a float or `scale_by_keep` is not a bool. Supported Platforms: + ``Ascend`` Examples: - - + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> import numpy as np + >>> dp = DropPath(drop_prob=0.2) + >>> x = Tensor(np.ones([4, 8, 64]), mstype.float32) + >>> y = dp(x) """ def __init__(self, drop_prob=0.0, scale_by_keep=True): - super(DropPath, self).__init__() + super().__init__() self.drop_prob = drop_prob self.keep_prob = 1.0 - self.drop_prob self.scale_by_keep = scale_by_keep @@ -56,26 +76,48 @@ class DropPath(nn.Cell): class Mlp(nn.Cell): - """ + r""" + Feed-forward MLP block used inside transformer-like layers. + + Architecture: + fc1 -> GELU -> dropout -> fc2 -> dropout Args: + embed_dims (int): Input and output embedding dimensionality. + mlp_ratio (int): Expansion ratio for the hidden layer (hidden = embed_dims * mlp_ratio). + dropout_rate (float): Dropout keep probability (MindSpore Dropout uses keep_prob). Default: ``1.0``. + compute_dtype (dtype.Number): Computation dtype for Dense layers. Default: ``mstype.float16``. Inputs: + x (Tensor): Input tensor of shape :math:`(batch, seq\_len, embed\_dims)`. Outputs: + Tensor: Output tensor of shape :math:`(batch, seq\_len, embed\_dims)`, same as input. + + Raises: + TypeError: If `embed_dims` or `mlp_ratio` is not an int. + TypeError: If `dropout_rate` is not a float. + TypeError: If `compute_dtype` is not a valid MindSpore dtype. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> mlp = Mlp(embed_dims=256, mlp_ratio=4, dropout_rate=0.9, compute_dtype=mstype.float16) + >>> x = Tensor(np.ones([2, 100, 256]), mstype.float32) + >>> out = mlp(x) + >>> print(out.shape) + (2, 100, 256) """ def __init__(self, embed_dims, mlp_ratio, dropout_rate=1.0, compute_dtype=mstype.float16): - super(Mlp, self).__init__() + super().__init__() self.fc1 = nn.Dense( embed_dims, embed_dims * mlp_ratio, weight_init=initializer( @@ -106,19 +148,47 @@ class Mlp(nn.Cell): class AFNOBlock(nn.Cell): - """ + r""" + AFNO Block combining spectral AFNO2D filtering with transformer-style residual MLP. + + The block performs: + x -> LayerNorm -> AFNO2D -> +residual + -> LayerNorm -> MLP -> DropPath -> +residual Args: + embed_dims (int): Token embedding dimensionality. + mlp_ratio (int): Expansion ratio for the MLP hidden layer. + dropout_rate (float): Dropout keep probability. Default: ``1.0``. + drop_path (float): Stochastic depth drop probability (used if > 0). Default: ``0.0``. + h_size (int): Original input height in pixels (used to infer AFNO grid height when combined with patch_size). + w_size (int): Original input width in pixels. + patch_size (int): Patch size used when reconstructing spatial grid inside AFNO2D. + num_blocks (int): Number of channel blocks inside AFNO2D. + high_freq (bool): Enable high-frequency pathway in AFNO2D. Default: ``False``. + compute_dtype (dtype.Number): Computation dtype for submodules. Default: ``mstype.float16``. Inputs: + x (Tensor): Token tensor of shape :math:`(batch, n\_patches, embed\_dims)`. Outputs: + Tensor: Output tensor of same shape as input: :math:`(batch, n\_patches, embed\_dims)`. + + Raises: + TypeError: If numeric args are not of expected types. + ValueError: If `embed_dims` is not divisible by `num_blocks`. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> block = AFNOBlock(embed_dims=768, mlp_ratio=4, dropout_rate=0.9, h_size=128, w_size=256, patch_size=8) + >>> x = Tensor(np.ones([2, (128//8)*(256//8), 768]), mstype.float32) + >>> y = block(x) + >>> print(y.shape) + (2, 512, 768) """ def __init__(self, embed_dims, @@ -131,7 +201,7 @@ class AFNOBlock(nn.Cell): num_blocks=8, high_freq=False, compute_dtype=mstype.float16): - super(AFNOBlock, self).__init__() + super().__init__() self.embed_dims = embed_dims self.layer_norm = nn.LayerNorm([embed_dims], epsilon=1e-6).to_float(compute_dtype) @@ -163,26 +233,49 @@ class AFNOBlock(nn.Cell): class PatchEmbed(nn.Cell): - """ + r""" + Patch embedding implemented via a Conv2d projection. + + The convolution uses kernel_size = stride = patch_size to form non-overlapping patches. + Output tokens are returned as (batch, num_patches, embed_dims). Args: + in_channels (int): Number of input channels (e.g., 1 or 3). + embed_dims (int): Output embedding dimensionality per patch. + patch_size (int): Patch size used as conv kernel and stride. Default: ``16``. + compute_dtype (dtype.Number): Computation dtype for convolution. Default: ``mstype.float16``. Inputs: + x (Tensor): Input image tensor of shape :math:`(batch, in\_channels, H, W)`. + H and W must be divisible by `patch_size`. Outputs: + Tensor: Tensor of shape :math:`(batch, num\_patches, embed\_dims)` where + :math:`num\_patches = (H // patch\_size) * (W // patch\_size)`. + + Raises: + TypeError: If `in_channels`, `embed_dims`, or `patch_size` are not ints. + ValueError: If H or W are not divisible by `patch_size` at runtime. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> pe = PatchEmbed(in_channels=1, embed_dims=768, patch_size=16) + >>> img = Tensor(np.ones([2, 1, 128, 128]), mstype.float32) + >>> tokens = pe(img) + >>> print(tokens.shape) + (2, 64, 768) """ def __init__(self, in_channels, embed_dims, patch_size=16, compute_dtype=mstype.float16): - super(PatchEmbed, self).__init__() + super().__init__() self.compute_dtype = compute_dtype self.proj = nn.Conv2d(in_channels=in_channels, out_channels=embed_dims, @@ -201,19 +294,48 @@ class PatchEmbed(nn.Cell): class ForwardFeatures(nn.Cell): - """ + r""" + ForwardFeatures stacks patch embedding, learnable positional embeddings and multiple AFNOBlocks. + + The module maps raw input images/fields to a token sequence and then applies `depth` + AFNOBlock layers. Args: + grid_size (tuple[int, int]): Grid of patches as (H_patches, W_patches). + h_size (int): Original height in pixels (used to compute AFNO internal grid). + w_size (int): Original width in pixels. + in_channels (int): Number of input channels (e.g., 1 or 3). + patch_size (int): Patch size used by PatchEmbed. + depth (int): Number of AFNOBlock layers to stack. + embed_dims (int): Embedding dimensionality of tokens. + mlp_ratio (int): MLP expansion ratio inside AFNOBlock. Default: ``4``. + dropout_rate (float): Positional dropout keep probability. Default: ``1.0``. + compute_dtype (dtype.Number): Computation dtype for submodules. Default: ``mstype.float16``. Inputs: + x (Tensor): Input tensor of shape :math:`(batch, in\_channels, H, W)`. Outputs: + Tensor: Tensor of shape :math:`(batch, num\_patches, embed\_dims)` where + :math:`num\_patches = grid\_size[0] * grid\_size[1]`. + + Raises: + TypeError: If `grid_size` is not a tuple of two ints. + TypeError: If `depth` or `embed_dims` is not an int. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> ff = ForwardFeatures(grid_size=(16,16), h_size=128, w_size=128, in_channels=1, + ... patch_size=8, depth=6, embed_dims=768) + >>> img = Tensor(np.ones([2, 1, 128, 128]), mstype.float32) + >>> tokens = ff(img) + >>> print(tokens.shape) + (2, 256, 768) """ def __init__(self, grid_size, @@ -226,7 +348,7 @@ class ForwardFeatures(nn.Cell): mlp_ratio=4, dropout_rate=1.0, compute_dtype=mstype.float16): - super(ForwardFeatures, self).__init__() + super().__init__() self.patch_embed = PatchEmbed( in_channels, embed_dims, patch_size, compute_dtype=compute_dtype ) @@ -267,19 +389,53 @@ class ForwardFeatures(nn.Cell): class AFNO2D(nn.Cell): - """ + r""" + Adaptive Fourier Neural Operator (AFNO) 2D implementation. + + This cell applies learned, block-wise linear transforms in Fourier domain over channel groups, + with optional high-frequency pathway, sparsification and inverse transform to produce filtered tokens. + + High-level steps: + 1. Reshape input tokens to spatial grid (b, h, w, c). + 2. Apply real DFT (RDFTn) -> (real, imag). + 3. Partition channel dimension into `num_blocks` blocks and apply complex linear maps parameterized + by weights (w1, w2) and biases (b1, b2). + 4. Apply ReLU, zero out frequencies beyond `kept_modes`, further linear maps, softshrink sparsification. + 5. Inverse RDFT (IRDFTn) and add residual/bias. Args: + h_size (int): Height in tokens (number of vertical patches). + w_size (int): Width in tokens (number of horizontal patches). + mean (optional[array or Tensor]): Optional mean for input normalization (kept as attribute). Default: ``None``. + std (optional[array or Tensor]): Optional std for input normalization (kept as attribute). Default: ``None``. + embed_dims (int): Embedding dimensionality (number of channels). Default: ``768``. + num_blocks (int): Number of channel blocks used for block-wise transforms. Default: ``8``. + high_freq (bool): Whether to enable a high-frequency pathway using a learnable Dense. Default: ``False``. + compute_dtype (dtype.Number): Computation dtype for parameters and transforms. Default: ``mstype.float16``. Inputs: + x (Tensor): Token tensor of shape :math:`(batch, n\_patches, embed\_dims)`. + Here :math:`n\_patches = h\_size * w\_size`. Outputs: + Tensor: Tensor of shape :math:`(batch, n\_patches, embed\_dims)`, the AFNO-filtered tokens. + + Raises: + TypeError: If `h_size`, `w_size`, `embed_dims` or `num_blocks` are not ints. + ValueError: If `embed_dims` is not divisible by `num_blocks`. Supported Platforms: + ``Ascend`` Examples: - - + >>> import numpy as np + >>> from mindspore import Tensor + >>> import mindspore.common.dtype as mstype + >>> afno = AFNO2D(h_size=16, w_size=16, embed_dims=768, num_blocks=8) + >>> tokens = Tensor(np.ones([2, 256, 768]), mstype.float32) # 16*16=256 patches + >>> out = afno(tokens) + >>> print(out.shape) + (2, 256, 768) """ def __init__(self, h_size=32, diff --git a/mindscience/models/neural_operator/readme b/mindscience/models/neural_operator/readme new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391