diff --git a/mindformers/parallel_core/transformer_config.py b/mindformers/parallel_core/transformer_config.py index 297cdc5da60794b8938848b266fc86bf79ff2bb1..d88c3666ae54af761d3878a99e6e37dfdf5f6f0d 100644 --- a/mindformers/parallel_core/transformer_config.py +++ b/mindformers/parallel_core/transformer_config.py @@ -660,6 +660,17 @@ class TransformerConfig(ModelParallelConfig, MFModelConfig): setattr(self, k, v) del self.rope_scaling + if self.position_embedding_type == "none": + self.nope_layer_interval = None + + if self.nope_layer_interval is None: + pass + elif not isinstance(self.nope_layer_interval, int): + raise TypeError("nope_layer_interval must be a int, " + f"but got {type(self.nope_layer_interval)}.") + elif self.nope_layer_interval <= 0: + raise ValueError("nope_layer_interval must be larger than 0.") + if self.bias_swiglu_fusion and self.hidden_act != 'swiglu': raise ValueError( "When using bias_swiglu_fusion, hidden_act must be swiglu."