diff --git a/examples/diffusers/flux/README.md b/examples/diffusers/flux/README.md
index d8cc92e18b5a3dc628bc7c86ea642a227095f907..c33281ac0d534ac7d6f835d9ef13fd19ea184912 100644
--- a/examples/diffusers/flux/README.md
+++ b/examples/diffusers/flux/README.md
@@ -179,35 +179,6 @@
       --validation_prompt="a validation prompt based on your own dataset" \
     ```
 
-    【因模型较大 如不需要`checkpointing_steps`，请设置其大于`max_train_steps`, 避免开启】
-
-    ```shell
-    --checkpointing_steps=50000 \ # 修改50000步为所需要步数
-    ```
-
-    【如需保存checkpointing请修改代码】
-
-    ```shell
-    vim examples/dreambooth/train_dreambooth_flux.py #（1669行附近）
-    vim examples/dreambooth/train_dreambooth_lora_flux.py #（1788行附近）
-    ```
-
-    - 在文件上方的import栏增加`DistributedType`在`from accelerate import Acceleratore`后 （30行附近）
-    - 在`if accelerator.is_main_process`后增加 `or accelerator.distributed_type == DistributedType.DEEPSPEED`
-
-    ```python
-    from accelerate import Accelerator, DistributedType
-    if accelerator.is_main_process or accelerator.distributed_type == DistributedType.DEEPSPEED:
-    ```
-
-    更改shell脚本：
-
-    ```shell
-    export HCCL_CONNECT_TIMEOUT=1200 # 大幅调高HCCL_CONNECT_TIMEOUT (如5000)
-    export HCCL_EXEC_TIMEOUT=17000
-    --checkpointing_steps=50000 \ # 修改50000步为所需要步数
-    ```
-
 2. 【配置 FLUX 微调脚本】
 
     联网情况下，微调模型可通过以下步骤下载。无网络时，用户可访问huggingface官网自行下载[FLUX.1-dev模型](https://huggingface.co/black-forest-labs/FLUX.1-dev) `model_name`模型
@@ -236,9 +207,12 @@
         vim src/diffusers/models/embeddings.py
         ```
 
-    2. 打开`train_dreambooth_flux.py`或`train_dreambooth_lora_flux.py`文件
+        ```python
+        freqs_dtype = torch.float32 # 760行附近
+        # freqs_dtype = torch.float32 if is_mps else torch.float64 # 原代码
+        ```
 
-        - 在62行附近添加代码
+    2. 打开`train_dreambooth_flux.py`或`train_dreambooth_lora_flux.py`文件
 
         ```shell
         cd examples/dreambooth/ # 从diffusers目录进入dreambooth目录
@@ -246,11 +220,16 @@
         vim train_dreambooth_lora_flux.py # 进入Python文件
         ```
 
+        - 在import栏/`if is_wandb_available():`上方（62行附近添加代码）
+
         ```python
         # 添加代码到train_dreambooth_flux.py 62行附近
-        from patch_flux import TorchPatcher, config_gc
+        from patch_flux import TorchPatcher, config_gc, create_save_model_hook
         TorchPatcher.apply_patch()
         config_gc()
+
+        if is_wandb_available(): # 原代码
+          import wandb
         ```
 
         - 在log_validation里修改`pipeline = pipeline.to(accelerator.device)`，`train_dreambooth_flux.py`在171行附近`train_dreambooth_lora_flux.py`在180行附近
@@ -258,17 +237,70 @@
         ```python
         # 修改pipeline为：
         pipeline = pipeline.to(accelerator.device, dtype=torch_dtype)
+        # pipeline = pipeline.to(accelerator.device) # 原代码
         ```
 
     3. 【Optional】Ubuntu系统需在1701行附近 添加 `accelerator.print("")`
 
         ```python
-        if global_step >= args.max_train_steps:
+        if global_step >= args.max_train_steps: # 原代码
           break
-        accelerator.print("")
+        accelerator.print("") # 添加
+        ```
+
+    4. 【Optional】模型checkpoint saving保存
+
+        【因模型较大 如不需要`checkpointing_steps`，请设置其大于`max_train_steps`, 避免开启】
+
+        ```shell
+        --checkpointing_steps=50000 \ # 修改50000步为所需要步数
+        ```
+
+        【如需保存checkpointing请修改代码】
+
+        ```shell
+        vim examples/dreambooth/train_dreambooth_flux.py #（1669行附近）
+        vim examples/dreambooth/train_dreambooth_lora_flux.py #（1788行附近）
+        ```
+
+        - 在文件上方的import栏增加`DistributedType`在`from accelerate import Acceleratore`后 （30行附近）
+        - 在`if accelerator.is_main_process`后增加 `or accelerator.distributed_type == DistributedType.DEEPSPEED` (1669/1788行附近)
+
+        ```python
+        from accelerate import Accelerator, DistributedType
+        # from accelerate import Accelerator # 原代码
+
+        if accelerator.is_main_process or accelerator.distributed_type == DistributedType.DEEPSPEED:
+        # if accelerator.is_main_process: # 原代码
+        ```
+
+        Lora任务需调用patch任务进行权重保存：
+        在`train_dreambooth_lora_flux.py`文件中找到代码`accelerator.register_save_state_pre_hook(save_model_hook)`进行修改(1308行附近)，复制粘贴以下代码：
+
+        ```python
+        # 添加
+        save_Model_Hook = create_save_model_hook(
+              accelerator=accelerator,
+              unwrap_model=unwrap_model,
+              transformer=transformer,
+              text_encoder_one=text_encoder_one,
+              args=args,
+              weight_dtype=weight_dtype
+        )
+        accelerator.register_save_state_pre_hook(save_Model_Hook) # 修改
+        # accelerator.register_save_state_pre_hook(save_model_hook) # 原代码
+        accelerator.register_load_state_pre_hook(load_model_hook) # 原代码 不修改
+        ```
+
+        更改shell脚本：
+
+        ```shell
+        export HCCL_CONNECT_TIMEOUT=1200 # 大幅调高HCCL_CONNECT_TIMEOUT (如5000)
+        export HCCL_EXEC_TIMEOUT=17000
+        --checkpointing_steps=50000 \ # 修改50000步为所需要步数
         ```
 
-3. 【启动 FLUX 微调脚本】
+4. 【启动 FLUX 微调脚本】
 
     本任务主要提供flux_dreambooth与flux_dreambooth_lora微调脚本，支持多卡训练。
 
diff --git a/examples/diffusers/flux/patch_flux.py b/examples/diffusers/flux/patch_flux.py
index b16371ac60fc55575e7f077715fb18f3e2d7cbac..7368ed98f1168f90fac92ac681c5a8f07c2b0929 100644
--- a/examples/diffusers/flux/patch_flux.py
+++ b/examples/diffusers/flux/patch_flux.py
@@ -1,8 +1,25 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 # Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import gc
 
 import torch
+from diffusers import FluxPipeline
+from peft.utils import get_peft_model_state_dict
 from torch.distributed._shard.sharded_tensor.api import ShardedTensor
 
 
@@ -31,5 +48,59 @@ class TorchPatcher:
 
 
 def config_gc():
-    # set gc threshold
+    # set gc threshold, best range from experiments
     gc.set_threshold(700, 50, 1000)
+
+
+# Save Lora weights for checkpointing steps
+def create_save_model_hook(
+    accelerator, unwrap_model, transformer, text_encoder_one, args, weight_dtype
+):
+    def save_model_hook(models, weights, output_dir):
+        if accelerator.is_main_process:
+            transformer_lora_layers_to_save = None
+            text_encoder_one_lora_layers_to_save = None
+
+            for model in models:
+                if isinstance(unwrap_model(model), type(unwrap_model(transformer))):
+                    transformer_model = unwrap_model(model)
+                    if args.upcast_before_saving:
+                        transformer_model = transformer_model.to(torch.float32)
+                    else:
+                        transformer_model = transformer_model.to(weight_dtype)
+                    transformer_lora_layers_to_save = get_peft_model_state_dict(
+                        transformer_model
+                    )
+
+                elif (
+                    isinstance(
+                        unwrap_model(model), type(unwrap_model(text_encoder_one))
+                    )
+                    and args.train_text_encoder
+                ):
+                    text_encoder_one_lora_layers_to_save = get_peft_model_state_dict(
+                        model.to(torch.float32)
+                    )
+
+                elif (
+                    isinstance(
+                        unwrap_model(model), type(unwrap_model(text_encoder_one))
+                    )
+                    and not args.train_text_encoder
+                ):
+                    text_encoder_one_lora_layers_to_save = None
+
+                else:
+                    raise ValueError(f"unexpected save model: {model.__class__}")
+
+                # make sure to pop weight so that corresponding model is not saved again
+                if weights:
+                    weights.pop()
+
+            FluxPipeline.save_lora_weights(
+                output_dir,
+                transformer_lora_layers=transformer_lora_layers_to_save,
+                text_encoder_lora_layers=text_encoder_one_lora_layers_to_save,
+            )
+
+    return save_model_hook
diff --git a/examples/diffusers/sd3/README.md b/examples/diffusers/sd3/README.md
index 26d800775559657dd7eba7d979c74c3c0c325f34..e32829e1e0179209a8a8c4a697fe9479546ebbfd 100644
--- a/examples/diffusers/sd3/README.md
+++ b/examples/diffusers/sd3/README.md
@@ -223,9 +223,9 @@ torch npu 与 CANN包参考链接：[安装包参考链接](https://support.huaw
     如下：
 
     ```python
-    if global_step >= args.max_train_steps:
+    if global_step >= args.max_train_steps: # 原代码
       break
-    accelerator.print("")
+    accelerator.print("") # 添加
     ```
 
 4. 【如需保存checkpointing请修改代码】
@@ -236,14 +236,39 @@ torch npu 与 CANN包参考链接：[安装包参考链接](https://support.huaw
     vim examples/dreambooth/train_dreambooth_lora_sd3.py
     ```
 
+    - 在文件上方的import栏增加`DistributedType`在`from accelerate import Acceleratore`后 （30行附近），并增加patch引用`from patch_sd3 import create_save_model_hook`
     - 在`if accelerator.is_main_process`后增加 `or accelerator.distributed_type == DistributedType.DEEPSPEED`（dreambooth在1681行附近,lora在1833行附近）
-    - 在文件上方的import栏增加`DistributedType`在`from accelerate import Acceleratore`后 （30行附近）
 
     ```python
     from accelerate import Accelerator, DistributedType
+    # from accelerate import Accelerator # 原代码
+    from patch_sd3 import create_save_model_hook # 添加此行patch引用代码
+    from accelerate.logging import get_logger # 原代码
+     
     if accelerator.is_main_process or accelerator.distributed_type == DistributedType.DEEPSPEED:
+    # if accelerator.is_main_process: # 原代码 1681/1833行附近
     ```
 
+    Lora任务需调用patch任务进行权重保存：
+
+    在`train_dreambooth_lora_sd3.py`文件中找到代码`accelerator.register_save_state_pre_hook(save_model_hook)`进行修改(1368行附近)，修改如下：
+
+   ```python
+   # 添加
+   save_Model_Hook = create_save_model_hook(
+          accelerator=accelerator,
+          unwrap_model=unwrap_model,
+          transformer=transformer,
+          text_encoder_one=text_encoder_one,
+          text_encoder_two=text_encoder_two,
+          args=args,
+          weight_dtype=weight_dtype
+   )
+   accelerator.register_save_state_pre_hook(save_Model_Hook) # 修改
+   # accelerator.register_save_state_pre_hook(save_model_hook) # 原代码
+   accelerator.register_load_state_pre_hook(load_model_hook) # 原代码 不修改
+   ```
+
 5. 【修改文件】
 
     ```shell
@@ -257,6 +282,7 @@ torch npu 与 CANN包参考链接：[安装包参考链接](https://support.huaw
     ```python
     # 修改pipeline为：
     pipeline = pipeline.to(accelerator.device, dtype=torch_dtype)
+    # pipeline = pipeline.to(accelerator.device) # 原代码
     ```
 
 6. 【启动 SD3 微调脚本】
diff --git a/examples/diffusers/sd3/patch_sd3.py b/examples/diffusers/sd3/patch_sd3.py
new file mode 100644
index 0000000000000000000000000000000000000000..42ac7a01a6fca2689a491554916afe02e73a50be
--- /dev/null
+++ b/examples/diffusers/sd3/patch_sd3.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+from diffusers import StableDiffusion3Pipeline
+from peft.utils import get_peft_model_state_dict
+
+
+# Save Lora weights for checkpointing steps
+def create_save_model_hook(
+    accelerator,
+    unwrap_model,
+    transformer,
+    text_encoder_one,
+    text_encoder_two,
+    args,
+    weight_dtype,
+):
+    def save_model_hook(models, weights, output_dir):
+        if accelerator.is_main_process:
+            transformer_lora_layers_to_save = None
+            text_encoder_one_lora_layers_to_save = None
+            text_encoder_two_lora_layers_to_save = None
+
+            for model in models:
+                if isinstance(unwrap_model(model), type(unwrap_model(transformer))):
+                    transformer_model = unwrap_model(model)
+                    if args.upcast_before_saving:
+                        transformer_model = transformer_model.to(torch.float32)
+                    else:
+                        transformer_model = transformer_model.to(weight_dtype)
+                    transformer_lora_layers_to_save = get_peft_model_state_dict(
+                        transformer_model
+                    )
+
+                elif (
+                    isinstance(
+                        unwrap_model(model), type(unwrap_model(text_encoder_one))
+                    )
+                    and args.train_text_encoder
+                ):
+                    # both text encoders are of the same class
+                    hidden_size = unwrap_model(model).config.hidden_size
+                    if hidden_size == 768:
+                        text_encoder_one_lora_layers_to_save = (
+                            get_peft_model_state_dict(model.to(torch.float32))
+                        )
+                    elif hidden_size == 1280:
+                        text_encoder_two_lora_layers_to_save = (
+                            get_peft_model_state_dict(model.to(torch.float32))
+                        )
+
+                elif (
+                    isinstance(
+                        unwrap_model(model), type(unwrap_model(text_encoder_one))
+                    )
+                    and not args.train_text_encoder
+                ):
+                    text_encoder_one_lora_layers_to_save = None
+                    text_encoder_two_lora_layers_to_save = None
+
+                else:
+                    raise ValueError(f"unexpected save model: {model.__class__}")
+
+                # make sure to pop weight so that corresponding model is not saved again
+                if weights:
+                    weights.pop()
+
+            StableDiffusion3Pipeline.save_lora_weights(
+                output_dir,
+                transformer_lora_layers=transformer_lora_layers_to_save,
+                text_encoder_lora_layers=text_encoder_one_lora_layers_to_save,
+                text_encoder_2_lora_layers=text_encoder_two_lora_layers_to_save,
+            )
+
+    return save_model_hook