From 4b49e2887477f73d207f87ff8cd6051e9331647f Mon Sep 17 00:00:00 2001 From: donghaoran Date: Tue, 10 Dec 2024 11:00:07 +0800 Subject: [PATCH] refactor files construction --- mindspeed_llm/core/datasets/gpt_dataset.py | 2 +- mindspeed_llm/legacy/data/data_samplers.py | 2 +- mindspeed_llm/legacy/model/transformer.py | 4 +-- .../tasks/evaluation/eval_impl/agi_eval.py | 6 ++-- .../tasks/evaluation/eval_impl/bbh_eval.py | 6 ++-- .../tasks/evaluation/eval_impl/boolq_eval.py | 6 ++-- .../tasks/evaluation/eval_impl/ceval_exam.py | 6 ++-- .../tasks/evaluation/eval_impl/gsm8k_eval.py | 6 ++-- .../tasks/evaluation/eval_impl/human_eval.py | 8 ++--- .../tasks/evaluation/eval_impl/mmlu_eval.py | 6 ++-- mindspeed_llm/tasks/megatron_adaptor.py | 2 +- mindspeed_llm/tasks/models/mask_generator.py | 0 .../tasks/models/transformer/fast_mlp.py | 2 +- mindspeed_llm/tasks/post_train/__init__.py | 14 --------- .../tasks/{data => posttrain}/__init__.py | 0 .../base/__init__.py | 0 .../base/base_trainer.py | 2 +- .../{post_train => posttrain}/dpo/__init__.py | 0 .../dpo/dpo_model.py | 2 +- .../dpo/dpo_trainer.py | 4 +-- .../dpo/simpo_trainer.py | 4 +-- .../{post_train => posttrain}/launcher.py | 8 ++--- .../{finetune => posttrain/lora}/__init__.py | 0 .../lora/cc_lora_forward.py | 0 .../{finetune => posttrain}/lora/lora_moe.py | 0 .../{finetune => posttrain}/lora/utils.py | 0 .../{post_train => posttrain}/rm/__init__.py | 0 .../{post_train => posttrain}/rm/rm_model.py | 0 .../rm/rm_trainer.py | 5 ++-- .../{post_train => posttrain}/sft/__init__.py | 0 .../sft/sft_trainer.py | 2 +- .../tasks/{post_train => posttrain}/utils.py | 0 .../tasks/{data => preprocess}/collator.py | 0 .../preprocess/decoder_packed_mtf_dataset.py | 2 +- mindspeed_llm/tasks/preprocess/mtf_dataset.py | 2 +- mindspeed_llm/tasks/trainer/launcher.py | 30 ------------------- .../{finetune/lora => utils}/__init__.py | 0 .../tasks/{ => utils}/error_utils.py | 0 mindspeed_llm/training/checkpointing.py | 4 +-- mindspeed_llm/training/initialize.py | 2 +- mindspeed_llm/training/training.py | 6 ++-- posttrain_gpt.py | 2 +- 42 files changed, 50 insertions(+), 95 deletions(-) delete mode 100644 mindspeed_llm/tasks/models/mask_generator.py delete mode 100644 mindspeed_llm/tasks/post_train/__init__.py rename mindspeed_llm/tasks/{data => posttrain}/__init__.py (100%) rename mindspeed_llm/tasks/{post_train => posttrain}/base/__init__.py (100%) rename mindspeed_llm/tasks/{post_train => posttrain}/base/base_trainer.py (99%) rename mindspeed_llm/tasks/{post_train => posttrain}/dpo/__init__.py (100%) rename mindspeed_llm/tasks/{post_train => posttrain}/dpo/dpo_model.py (98%) rename mindspeed_llm/tasks/{post_train => posttrain}/dpo/dpo_trainer.py (99%) rename mindspeed_llm/tasks/{post_train => posttrain}/dpo/simpo_trainer.py (98%) rename mindspeed_llm/tasks/{post_train => posttrain}/launcher.py (85%) rename mindspeed_llm/tasks/{finetune => posttrain/lora}/__init__.py (100%) rename mindspeed_llm/tasks/{finetune => posttrain}/lora/cc_lora_forward.py (100%) rename mindspeed_llm/tasks/{finetune => posttrain}/lora/lora_moe.py (100%) rename mindspeed_llm/tasks/{finetune => posttrain}/lora/utils.py (100%) rename mindspeed_llm/tasks/{post_train => posttrain}/rm/__init__.py (100%) rename mindspeed_llm/tasks/{post_train => posttrain}/rm/rm_model.py (100%) rename mindspeed_llm/tasks/{post_train => posttrain}/rm/rm_trainer.py (98%) rename mindspeed_llm/tasks/{post_train => posttrain}/sft/__init__.py (100%) rename mindspeed_llm/tasks/{post_train => posttrain}/sft/sft_trainer.py (98%) rename mindspeed_llm/tasks/{post_train => posttrain}/utils.py (100%) rename mindspeed_llm/tasks/{data => preprocess}/collator.py (100%) delete mode 100644 mindspeed_llm/tasks/trainer/launcher.py rename mindspeed_llm/tasks/{finetune/lora => utils}/__init__.py (100%) rename mindspeed_llm/tasks/{ => utils}/error_utils.py (100%) diff --git a/mindspeed_llm/core/datasets/gpt_dataset.py b/mindspeed_llm/core/datasets/gpt_dataset.py index 7fe141d64..4ac4617c0 100644 --- a/mindspeed_llm/core/datasets/gpt_dataset.py +++ b/mindspeed_llm/core/datasets/gpt_dataset.py @@ -12,7 +12,7 @@ from megatron.core.datasets.utils import Split, log_single_rank from megatron.core.datasets.gpt_dataset import (_build_document_index, _build_shuffle_index ) -from mindspeed_llm.tasks.error_utils import GPTDatasetSampleIndexError +from mindspeed_llm.tasks.utils.error_utils import GPTDatasetSampleIndexError from .blended_megatron_dataset_builder import need_to_build_dataset logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/legacy/data/data_samplers.py b/mindspeed_llm/legacy/data/data_samplers.py index 9265f7b82..adf7cf132 100644 --- a/mindspeed_llm/legacy/data/data_samplers.py +++ b/mindspeed_llm/legacy/data/data_samplers.py @@ -23,7 +23,7 @@ from transformers import DataCollatorForSeq2Seq from megatron.training import get_args, get_tokenizer from megatron.core import parallel_state from megatron.legacy.data.data_samplers import MegatronPretrainingSampler, MegatronPretrainingRandomSampler -from mindspeed_llm.tasks.data.collator import PairwiseDataCollatorWithPadding +from mindspeed_llm.tasks.preprocess.collator import PairwiseDataCollatorWithPadding def build_pretraining_data_loader(dataset, consumed_samples): diff --git a/mindspeed_llm/legacy/model/transformer.py b/mindspeed_llm/legacy/model/transformer.py index 02e712c1a..1987a6fa4 100644 --- a/mindspeed_llm/legacy/model/transformer.py +++ b/mindspeed_llm/legacy/model/transformer.py @@ -44,9 +44,9 @@ from mindspeed.model.transformer import get_attention_mask from mindspeed_llm.core.transformer.mlp import should_recompute_activation from mindspeed_llm.core.models.common.embeddings.rotary_pos_embedding import apply_rotary_pos_emb -from mindspeed_llm.tasks.error_utils import ensure_valid +from mindspeed_llm.tasks.utils.error_utils import ensure_valid from mindspeed_llm.tasks.models.common.alibi import Alibi -from mindspeed_llm.tasks.finetune.lora.utils import is_enable_lora +from mindspeed_llm.tasks.posttrain.lora.utils import is_enable_lora def state_dict_for_save_checkpoint(state_dict): diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py index 823a5c6b7..626e4dfa9 100644 --- a/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py +++ b/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py @@ -22,9 +22,9 @@ import tqdm from torch import distributed as dist from .template import AGIEVAL_TEMPLATE_DIR -from ..eval_api.dataset_eval import DatasetEval -from ..eval_api.chat import Chat -from ...error_utils import check_divisible_by_zero +from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval +from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat +from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py index 1168957d4..10c5ec6e3 100644 --- a/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py +++ b/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py @@ -25,9 +25,9 @@ from torch import distributed as dist from megatron.training import get_args from mindspeed_llm.tasks.preprocess.templates import Role from .template import BBH_TEMPLATE_DIR, get_eval_template -from ..eval_api.dataset_eval import DatasetEval -from ..eval_api.chat import Chat -from ...error_utils import check_divisible_by_zero +from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval +from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat +from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/boolq_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/boolq_eval.py index 7843182f4..547cf1903 100644 --- a/mindspeed_llm/tasks/evaluation/eval_impl/boolq_eval.py +++ b/mindspeed_llm/tasks/evaluation/eval_impl/boolq_eval.py @@ -20,9 +20,9 @@ import pandas as pd import tqdm from torch import distributed as dist -from ..eval_api.dataset_eval import DatasetEval -from ..eval_api.chat import Chat -from ...error_utils import check_divisible_by_zero +from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval +from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat +from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py b/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py index b931e60f4..ff332425d 100644 --- a/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py +++ b/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py @@ -23,9 +23,9 @@ import tqdm from torch import distributed as dist from .template import CEVAL_TEMPLATE_DIR, get_eval_template -from ..eval_api.dataset_eval import DatasetEval -from ..eval_api.chat import Chat -from ...error_utils import check_divisible_by_zero +from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval +from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat +from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py index 34d9c73db..7ebbbe342 100644 --- a/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py +++ b/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py @@ -22,9 +22,9 @@ import tqdm from torch import distributed as dist from .template import GSM8K_TEMPLATE_DIR -from ..eval_api.dataset_eval import DatasetEval -from ..eval_api.chat import Chat -from ...error_utils import check_divisible_by_zero +from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval +from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat +from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py index 2f72ee6cc..8f2648dce 100644 --- a/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py +++ b/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py @@ -25,10 +25,10 @@ import tqdm from torch import distributed as dist from .template import CODE_TEST_LOG_DIR -from ..eval_api.dataset_eval import DatasetEval -from ..eval_api.chat import Chat -from ...error_utils import check_divisible_by_zero -from ....training.utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES +from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval +from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat +from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero +from mindspeed_llm.training.utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py index 0d1fac161..1413ee09b 100644 --- a/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py +++ b/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py @@ -22,9 +22,9 @@ import tqdm from torch import distributed as dist from .template import MMLU_TEMPLATE_DIR, get_eval_template -from ..eval_api.dataset_eval import DatasetEval -from ..eval_api.chat import Chat -from ...error_utils import check_divisible_by_zero +from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval +from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat +from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/megatron_adaptor.py b/mindspeed_llm/tasks/megatron_adaptor.py index 1d8db9aab..888c463ec 100644 --- a/mindspeed_llm/tasks/megatron_adaptor.py +++ b/mindspeed_llm/tasks/megatron_adaptor.py @@ -342,7 +342,7 @@ class CoreAdaptation(MegatronAdaptationABC): _batched_p2p_ops) # dpo relative, we need to change the recv/send shape when using PP, then deal with it by ourselves. - from mindspeed_llm.tasks.post_train.utils import get_tensor_shapes_decorator + from mindspeed_llm.tasks.posttrain.utils import get_tensor_shapes_decorator MegatronAdaptation.register('megatron.core.pipeline_parallel.schedules.get_tensor_shapes', get_tensor_shapes_decorator) diff --git a/mindspeed_llm/tasks/models/mask_generator.py b/mindspeed_llm/tasks/models/mask_generator.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/mindspeed_llm/tasks/models/transformer/fast_mlp.py b/mindspeed_llm/tasks/models/transformer/fast_mlp.py index 41b41f677..f8bbaf914 100644 --- a/mindspeed_llm/tasks/models/transformer/fast_mlp.py +++ b/mindspeed_llm/tasks/models/transformer/fast_mlp.py @@ -4,7 +4,7 @@ try: import fused_weight_gradient_mlp_cuda except ImportError: fused_weight_gradient_mlp_cuda = None -from mindspeed_llm.tasks.finetune.lora.cc_lora_forward import get_tensor_model_parallel_group, \ +from mindspeed_llm.tasks.posttrain.lora.cc_lora_forward import get_tensor_model_parallel_group, \ _gather_along_first_dim_async, _reduce_scatter_along_first_dim_async, get_tensor_model_parallel_world_size diff --git a/mindspeed_llm/tasks/post_train/__init__.py b/mindspeed_llm/tasks/post_train/__init__.py deleted file mode 100644 index aaf493892..000000000 --- a/mindspeed_llm/tasks/post_train/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding=utf-8 -# Copyright (c) 2024, HUAWEI CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/mindspeed_llm/tasks/data/__init__.py b/mindspeed_llm/tasks/posttrain/__init__.py similarity index 100% rename from mindspeed_llm/tasks/data/__init__.py rename to mindspeed_llm/tasks/posttrain/__init__.py diff --git a/mindspeed_llm/tasks/post_train/base/__init__.py b/mindspeed_llm/tasks/posttrain/base/__init__.py similarity index 100% rename from mindspeed_llm/tasks/post_train/base/__init__.py rename to mindspeed_llm/tasks/posttrain/base/__init__.py diff --git a/mindspeed_llm/tasks/post_train/base/base_trainer.py b/mindspeed_llm/tasks/posttrain/base/base_trainer.py similarity index 99% rename from mindspeed_llm/tasks/post_train/base/base_trainer.py rename to mindspeed_llm/tasks/posttrain/base/base_trainer.py index ed73961da..f5db9015b 100644 --- a/mindspeed_llm/tasks/post_train/base/base_trainer.py +++ b/mindspeed_llm/tasks/posttrain/base/base_trainer.py @@ -23,7 +23,7 @@ from megatron.training.checkpointing import save_checkpoint from mindspeed_llm.training import build_train_args from mindspeed_llm.training import train from mindspeed_llm.training.initialize import set_jit_fusion_options -from mindspeed_llm.tasks.post_train.utils import train_valid_test_datasets_provider +from mindspeed_llm.tasks.posttrain.utils import train_valid_test_datasets_provider _TRAIN_START_TIME = time.time() diff --git a/mindspeed_llm/tasks/post_train/dpo/__init__.py b/mindspeed_llm/tasks/posttrain/dpo/__init__.py similarity index 100% rename from mindspeed_llm/tasks/post_train/dpo/__init__.py rename to mindspeed_llm/tasks/posttrain/dpo/__init__.py diff --git a/mindspeed_llm/tasks/post_train/dpo/dpo_model.py b/mindspeed_llm/tasks/posttrain/dpo/dpo_model.py similarity index 98% rename from mindspeed_llm/tasks/post_train/dpo/dpo_model.py rename to mindspeed_llm/tasks/posttrain/dpo/dpo_model.py index 830c6846b..27708ef14 100644 --- a/mindspeed_llm/tasks/post_train/dpo/dpo_model.py +++ b/mindspeed_llm/tasks/posttrain/dpo/dpo_model.py @@ -4,7 +4,7 @@ import torch from megatron.training import get_args from megatron.core import mpu from megatron.core.pipeline_parallel.schedules import get_attr_wrapped_model -from mindspeed_llm.tasks.post_train.utils import get_attr_from_wrapped_model +from mindspeed_llm.tasks.posttrain.utils import get_attr_from_wrapped_model class HyperModelABC(abc.ABC): diff --git a/mindspeed_llm/tasks/post_train/dpo/dpo_trainer.py b/mindspeed_llm/tasks/posttrain/dpo/dpo_trainer.py similarity index 99% rename from mindspeed_llm/tasks/post_train/dpo/dpo_trainer.py rename to mindspeed_llm/tasks/posttrain/dpo/dpo_trainer.py index dfaf094d8..c34950337 100644 --- a/mindspeed_llm/tasks/post_train/dpo/dpo_trainer.py +++ b/mindspeed_llm/tasks/posttrain/dpo/dpo_trainer.py @@ -10,8 +10,8 @@ from megatron.core.enums import ModelType from megatron.training.checkpointing import load_checkpoint from megatron.training.utils import average_losses_across_data_parallel_group from megatron.training.global_vars import set_args -from mindspeed_llm.tasks.post_train.base import BaseTrainer -from mindspeed_llm.tasks.post_train.dpo.dpo_model import DPOModel +from mindspeed_llm.tasks.posttrain.base import BaseTrainer +from mindspeed_llm.tasks.posttrain.dpo.dpo_model import DPOModel from mindspeed_llm.training.utils import get_tune_attention_mask, get_finetune_data_on_this_tp_rank from mindspeed_llm.training.utils import get_batch_on_this_cp_rank, generate_actual_seq_len diff --git a/mindspeed_llm/tasks/post_train/dpo/simpo_trainer.py b/mindspeed_llm/tasks/posttrain/dpo/simpo_trainer.py similarity index 98% rename from mindspeed_llm/tasks/post_train/dpo/simpo_trainer.py rename to mindspeed_llm/tasks/posttrain/dpo/simpo_trainer.py index 2068c13cb..6d7ba5838 100644 --- a/mindspeed_llm/tasks/post_train/dpo/simpo_trainer.py +++ b/mindspeed_llm/tasks/posttrain/dpo/simpo_trainer.py @@ -7,8 +7,8 @@ import torch.nn.functional as F from megatron.training import get_args from megatron.core import mpu from megatron.training.utils import average_losses_across_data_parallel_group -from mindspeed_llm.tasks.post_train.base import BaseTrainer -from mindspeed_llm.tasks.post_train.dpo import DPOTrainer +from mindspeed_llm.tasks.posttrain.base import BaseTrainer +from mindspeed_llm.tasks.posttrain.dpo import DPOTrainer class SimPOTrainer(BaseTrainer): diff --git a/mindspeed_llm/tasks/post_train/launcher.py b/mindspeed_llm/tasks/posttrain/launcher.py similarity index 85% rename from mindspeed_llm/tasks/post_train/launcher.py rename to mindspeed_llm/tasks/posttrain/launcher.py index 050ce86b9..7ee8d8ce7 100644 --- a/mindspeed_llm/tasks/post_train/launcher.py +++ b/mindspeed_llm/tasks/posttrain/launcher.py @@ -1,10 +1,10 @@ # Copyright (c) 2024, HUAWEI CORPORATION. All rights reserved. from megatron.training import get_args from megatron.training.initialize import initialize_megatron -from mindspeed_llm.tasks.post_train.sft import SFTTrainer -from mindspeed_llm.tasks.post_train.dpo import DPOTrainer -from mindspeed_llm.tasks.post_train.rm import RMTrainer -from mindspeed_llm.tasks.post_train.dpo import SimPOTrainer +from mindspeed_llm.tasks.posttrain.sft import SFTTrainer +from mindspeed_llm.tasks.posttrain.dpo import DPOTrainer +from mindspeed_llm.tasks.posttrain.rm import RMTrainer +from mindspeed_llm.tasks.posttrain.dpo import SimPOTrainer def get_trainer(stage): diff --git a/mindspeed_llm/tasks/finetune/__init__.py b/mindspeed_llm/tasks/posttrain/lora/__init__.py similarity index 100% rename from mindspeed_llm/tasks/finetune/__init__.py rename to mindspeed_llm/tasks/posttrain/lora/__init__.py diff --git a/mindspeed_llm/tasks/finetune/lora/cc_lora_forward.py b/mindspeed_llm/tasks/posttrain/lora/cc_lora_forward.py similarity index 100% rename from mindspeed_llm/tasks/finetune/lora/cc_lora_forward.py rename to mindspeed_llm/tasks/posttrain/lora/cc_lora_forward.py diff --git a/mindspeed_llm/tasks/finetune/lora/lora_moe.py b/mindspeed_llm/tasks/posttrain/lora/lora_moe.py similarity index 100% rename from mindspeed_llm/tasks/finetune/lora/lora_moe.py rename to mindspeed_llm/tasks/posttrain/lora/lora_moe.py diff --git a/mindspeed_llm/tasks/finetune/lora/utils.py b/mindspeed_llm/tasks/posttrain/lora/utils.py similarity index 100% rename from mindspeed_llm/tasks/finetune/lora/utils.py rename to mindspeed_llm/tasks/posttrain/lora/utils.py diff --git a/mindspeed_llm/tasks/post_train/rm/__init__.py b/mindspeed_llm/tasks/posttrain/rm/__init__.py similarity index 100% rename from mindspeed_llm/tasks/post_train/rm/__init__.py rename to mindspeed_llm/tasks/posttrain/rm/__init__.py diff --git a/mindspeed_llm/tasks/post_train/rm/rm_model.py b/mindspeed_llm/tasks/posttrain/rm/rm_model.py similarity index 100% rename from mindspeed_llm/tasks/post_train/rm/rm_model.py rename to mindspeed_llm/tasks/posttrain/rm/rm_model.py diff --git a/mindspeed_llm/tasks/post_train/rm/rm_trainer.py b/mindspeed_llm/tasks/posttrain/rm/rm_trainer.py similarity index 98% rename from mindspeed_llm/tasks/post_train/rm/rm_trainer.py rename to mindspeed_llm/tasks/posttrain/rm/rm_trainer.py index 7221f289f..3b54e062c 100644 --- a/mindspeed_llm/tasks/post_train/rm/rm_trainer.py +++ b/mindspeed_llm/tasks/posttrain/rm/rm_trainer.py @@ -13,10 +13,9 @@ from megatron.training.yaml_arguments import core_transformer_config_from_yaml from megatron.core.transformer.spec_utils import import_module from megatron.training.utils import average_losses_across_data_parallel_group from megatron.core.models.gpt import GPTModel -from mindspeed_llm.tasks.post_train.base import BaseTrainer +from mindspeed_llm.tasks.posttrain.base import BaseTrainer +from mindspeed_llm.tasks.posttrain.rm.rm_model import GPTRewardModel from mindspeed_llm.training.utils import get_tune_attention_mask, get_finetune_data_on_this_tp_rank -from mindspeed_llm.tasks.post_train.rm.rm_model import GPTRewardModel - class RMTrainer(BaseTrainer): """ diff --git a/mindspeed_llm/tasks/post_train/sft/__init__.py b/mindspeed_llm/tasks/posttrain/sft/__init__.py similarity index 100% rename from mindspeed_llm/tasks/post_train/sft/__init__.py rename to mindspeed_llm/tasks/posttrain/sft/__init__.py diff --git a/mindspeed_llm/tasks/post_train/sft/sft_trainer.py b/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py similarity index 98% rename from mindspeed_llm/tasks/post_train/sft/sft_trainer.py rename to mindspeed_llm/tasks/posttrain/sft/sft_trainer.py index 0afca30c9..fd034ad65 100644 --- a/mindspeed_llm/tasks/post_train/sft/sft_trainer.py +++ b/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py @@ -11,7 +11,7 @@ from megatron.training.utils import ( ) from megatron.training import get_timers from mindspeed_llm.training.utils import get_tune_attention_mask, get_finetune_data_on_this_tp_rank, generate_actual_seq_len -from mindspeed_llm.tasks.post_train.base import BaseTrainer +from mindspeed_llm.tasks.posttrain.base import BaseTrainer class SFTTrainer(BaseTrainer): diff --git a/mindspeed_llm/tasks/post_train/utils.py b/mindspeed_llm/tasks/posttrain/utils.py similarity index 100% rename from mindspeed_llm/tasks/post_train/utils.py rename to mindspeed_llm/tasks/posttrain/utils.py diff --git a/mindspeed_llm/tasks/data/collator.py b/mindspeed_llm/tasks/preprocess/collator.py similarity index 100% rename from mindspeed_llm/tasks/data/collator.py rename to mindspeed_llm/tasks/preprocess/collator.py diff --git a/mindspeed_llm/tasks/preprocess/decoder_packed_mtf_dataset.py b/mindspeed_llm/tasks/preprocess/decoder_packed_mtf_dataset.py index c4339410a..28286cefc 100644 --- a/mindspeed_llm/tasks/preprocess/decoder_packed_mtf_dataset.py +++ b/mindspeed_llm/tasks/preprocess/decoder_packed_mtf_dataset.py @@ -26,7 +26,7 @@ from megatron.training import print_rank_0, get_args from megatron.core import parallel_state from megatron.legacy.data.dataset_utils import get_train_valid_test_split_ from mindspeed_llm.training.tokenizer import build_tokenizer -from mindspeed_llm.tasks.error_utils import check_equal +from mindspeed_llm.tasks.utils.error_utils import check_equal from mindspeed_llm.tasks.preprocess.mtf_dataset import MTFDataset, get_packed_indexed_dataset logger = logging.getLogger(__name__) diff --git a/mindspeed_llm/tasks/preprocess/mtf_dataset.py b/mindspeed_llm/tasks/preprocess/mtf_dataset.py index 73df15e31..934cf3373 100644 --- a/mindspeed_llm/tasks/preprocess/mtf_dataset.py +++ b/mindspeed_llm/tasks/preprocess/mtf_dataset.py @@ -22,7 +22,7 @@ import numpy as np import torch from megatron.core.datasets.indexed_dataset import IndexedDataset -from mindspeed_llm.tasks.error_utils import ensure_valid +from mindspeed_llm.tasks.utils.error_utils import ensure_valid class MTFDataset(torch.utils.data.Dataset): diff --git a/mindspeed_llm/tasks/trainer/launcher.py b/mindspeed_llm/tasks/trainer/launcher.py deleted file mode 100644 index 5500480b3..000000000 --- a/mindspeed_llm/tasks/trainer/launcher.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2024, HUAWEI CORPORATION. All rights reserved. -from megatron.training import get_args -from megatron.training.initialize import initialize_megatron -from mindspeed_llm.tasks.trainer.base import BaseTrainer -from mindspeed_llm.tasks.rl.dpo import DPOTrainer -from mindspeed_llm.tasks.rl.rm import RMTrainer -from mindspeed_llm.tasks.rl.simpo import SimPOTrainer - - -class AutoTrainer: - """ - A trainer is used to automatically determine which scenario (common pt, sft, rm, dpo) to use. - """ - - def __init__(self): - initialize_megatron() - - self.args = get_args() - - if self.args.stage == "dpo": - self.trainer = DPOTrainer() - elif self.args.stage == "rm": - self.trainer = RMTrainer() - elif self.args.stage == "simpo": - self.trainer = SimPOTrainer() - else: - self.trainer = BaseTrainer() - - def train(self): - self.trainer.train() diff --git a/mindspeed_llm/tasks/finetune/lora/__init__.py b/mindspeed_llm/tasks/utils/__init__.py similarity index 100% rename from mindspeed_llm/tasks/finetune/lora/__init__.py rename to mindspeed_llm/tasks/utils/__init__.py diff --git a/mindspeed_llm/tasks/error_utils.py b/mindspeed_llm/tasks/utils/error_utils.py similarity index 100% rename from mindspeed_llm/tasks/error_utils.py rename to mindspeed_llm/tasks/utils/error_utils.py diff --git a/mindspeed_llm/training/checkpointing.py b/mindspeed_llm/training/checkpointing.py index 016f6e162..21ae6165d 100644 --- a/mindspeed_llm/training/checkpointing.py +++ b/mindspeed_llm/training/checkpointing.py @@ -18,8 +18,8 @@ from functools import wraps from megatron.training import get_args from megatron.training.utils import print_rank_0 from megatron.training.checkpointing import _load_base_checkpoint -from mindspeed_llm.tasks.finetune.lora.utils import is_enable_lora, merge_dicts, modify_keys_with_dict -from mindspeed_llm.tasks.post_train.utils import load_checkpoint_loosely +from mindspeed_llm.tasks.posttrain.lora.utils import is_enable_lora, merge_dicts, modify_keys_with_dict +from mindspeed_llm.tasks.posttrain.utils import load_checkpoint_loosely def _load_base_checkpoint_wrapper(fn): diff --git a/mindspeed_llm/training/initialize.py b/mindspeed_llm/training/initialize.py index 51100ccb4..79c01af45 100644 --- a/mindspeed_llm/training/initialize.py +++ b/mindspeed_llm/training/initialize.py @@ -31,7 +31,7 @@ from megatron.training.initialize import ( from mindspeed_llm.training.arguments import parse_args_decorator from mindspeed_llm.core.tensor_parallel.ascend_turbo.initialize import initialize_cfg_from_args -from mindspeed_llm.tasks.error_utils import ensure_valid +from mindspeed_llm.tasks.utils.error_utils import ensure_valid from mindspeed_llm.training.utils import seed_all diff --git a/mindspeed_llm/training/training.py b/mindspeed_llm/training/training.py index 7ecfdb21f..6fc2ba441 100644 --- a/mindspeed_llm/training/training.py +++ b/mindspeed_llm/training/training.py @@ -55,7 +55,7 @@ from megatron.training.utils import ( from megatron.core.distributed import DistributedDataParallel as DDP from megatron.core.distributed import finalize_model_grads from mindspeed_llm.training.initialize import set_jit_fusion_options -from mindspeed_llm.tasks.finetune.lora.utils import is_enable_lora +from mindspeed_llm.tasks.posttrain.lora.utils import is_enable_lora def model_provider_func_wrapper(model_provider_func): @@ -79,12 +79,12 @@ def model_provider_func_wrapper(model_provider_func): setattr(peft.tuners.lora.LoraLayer, 'unmerge', peft.tuners.lora.Linear.unmerge) setattr(peft.tuners.lora.LoraLayer, 'get_delta_weight', peft.tuners.lora.Linear.get_delta_weight) from peft.tuners.lora import tp_layer - from mindspeed_llm.tasks.finetune.lora.lora_moe import LoraParallelLinearMoE + from mindspeed_llm.tasks.posttrain.lora.lora_moe import LoraParallelLinearMoE tp_layer.LoraParallelLinear = LoraParallelLinearMoE if hasattr(args, 'lora_fusion') and args.lora_fusion: from peft.tuners.lora.tp_layer import LoraParallelLinear - from mindspeed_llm.tasks.finetune.lora.cc_lora_forward import CCLoraParallelLinearForward + from mindspeed_llm.tasks.posttrain.lora.cc_lora_forward import CCLoraParallelLinearForward LoraParallelLinear.forward = CCLoraParallelLinearForward config = core_transformer_config_from_args(args) diff --git a/posttrain_gpt.py b/posttrain_gpt.py index 7264d9f0a..f7275f15f 100644 --- a/posttrain_gpt.py +++ b/posttrain_gpt.py @@ -1,6 +1,6 @@ # Copyright (c) 2024, HUAWEI CORPORATION. All rights reserved. from mindspeed_llm import megatron_adaptor -from mindspeed_llm.tasks.post_train.launcher import AutoTrainer +from mindspeed_llm.tasks.posttrain.launcher import AutoTrainer def launch(): -- Gitee