From 4a2e680667ecfa4fcf25b2c86641588b20e6e255 Mon Sep 17 00:00:00 2001 From: donghaoran Date: Wed, 7 Aug 2024 17:42:31 +0800 Subject: [PATCH 1/3] fix adaptation issues of lora and recomputation --- modellink/training.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modellink/training.py b/modellink/training.py index bf2f72eb6..fe7c2e584 100644 --- a/modellink/training.py +++ b/modellink/training.py @@ -85,6 +85,20 @@ def model_provider_func_wrapper(model_provider_func): model = get_peft_model(model, lora_config) model.add_module('module', model.get_base_model()) + + def _hook(_module, _x_in, _x_out): + """ Extract the feature map of model""" + _x_out.requires_grad_(True) + def _create_hooks(_model, layer): + """ Make the hooks function""" + for name, module in _model.named_modules(): + if type(module) == megatron.core.tensor_parallel.layers.VocabParallelEmbedding: + _name = name.split('.')[-1] + if _name in layer: + module.register_forward_hook(_hook) + if args.recompute_method == 'block' and args.recompute_granularity == 'full': + _create_hooks(model, args.lora_register_forward_hook) + model.print_trainable_parameters() for module in model.modules(): # LoRA Linear Layer need all reduce -- Gitee From c5ad375750125f64169e1b3abec6260d775ac7cf Mon Sep 17 00:00:00 2001 From: DONGHAORAN Date: Mon, 12 Aug 2024 08:51:06 +0000 Subject: [PATCH 2/3] update modellink/training.py. Signed-off-by: DONGHAORAN --- modellink/training.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modellink/training.py b/modellink/training.py index fe7c2e584..040407d15 100644 --- a/modellink/training.py +++ b/modellink/training.py @@ -85,14 +85,13 @@ def model_provider_func_wrapper(model_provider_func): model = get_peft_model(model, lora_config) model.add_module('module', model.get_base_model()) - def _hook(_module, _x_in, _x_out): """ Extract the feature map of model""" _x_out.requires_grad_(True) def _create_hooks(_model, layer): """ Make the hooks function""" for name, module in _model.named_modules(): - if type(module) == megatron.core.tensor_parallel.layers.VocabParallelEmbedding: + if isinstance(module, megatron.core.tensor_parallel.layers.VocabParallelEmbedding): _name = name.split('.')[-1] if _name in layer: module.register_forward_hook(_hook) -- Gitee From 461fc61af464fd73a90f0563ccc8a93397f8b0eb Mon Sep 17 00:00:00 2001 From: DONGHAORAN Date: Mon, 12 Aug 2024 09:21:31 +0000 Subject: [PATCH 3/3] update modellink/training.py. Signed-off-by: DONGHAORAN --- modellink/training.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modellink/training.py b/modellink/training.py index 040407d15..ff0cd983b 100644 --- a/modellink/training.py +++ b/modellink/training.py @@ -85,9 +85,11 @@ def model_provider_func_wrapper(model_provider_func): model = get_peft_model(model, lora_config) model.add_module('module', model.get_base_model()) + def _hook(_module, _x_in, _x_out): """ Extract the feature map of model""" _x_out.requires_grad_(True) + def _create_hooks(_model, layer): """ Make the hooks function""" for name, module in _model.named_modules(): -- Gitee