From 15c95721104303b1d0cd4bdead4c6042b913289b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AD=8F=E7=90=A2=E8=89=BA?= Date: Mon, 28 Apr 2025 10:50:40 +0800 Subject: [PATCH] actual_seq_len fix --- mindformers/dataset/dataloader/blended_megatron_dataloader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mindformers/dataset/dataloader/blended_megatron_dataloader.py b/mindformers/dataset/dataloader/blended_megatron_dataloader.py index 7606a14c1..2baf27551 100644 --- a/mindformers/dataset/dataloader/blended_megatron_dataloader.py +++ b/mindformers/dataset/dataloader/blended_megatron_dataloader.py @@ -191,6 +191,7 @@ class MegatronDatasetBuilder: eod_mask_loss=self.config.get("eod_mask_loss", False), create_attention_mask=self.config.get("create_attention_mask", True), create_compressed_eod_mask=self.config.get("create_compressed_eod_mask", False), + eod_pad_length=self.config.get("eod_pad_length", False), s3_cache_path=self.config.get("s3_cache_path", None), drop_last_partial_validation_sequence=self.config.get("drop_last_partial_validation_sequence", True), add_extra_token_to_sequence=self.config.get("add_extra_token_to_sequence", True), -- Gitee