diff --git a/mindformers/parallel_core/transformer_config_utils.py b/mindformers/parallel_core/transformer_config_utils.py index 21b8407fc5aad4df89fa0573545c0edf06f0b268..91bb459561be6f7eb3a5cf34674a487d78b61337 100644 --- a/mindformers/parallel_core/transformer_config_utils.py +++ b/mindformers/parallel_core/transformer_config_utils.py @@ -191,6 +191,7 @@ DEFAULT_WHITE_KEY.update({ 'monitor_config', 'dataset_config', 'multiple_of', 'ffn_dim_multiplier', 'qkv_concat', 'use_past', 'scaling_factor', 'input_sliced_sig', 'return_extra_loss', 'moe_config' }) +DEFAULT_WHITE_KEY.discard('pad_token_id') # pad_token_id used in training for generating attention mask ERROR_LOG = {}