diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e518cb109d600e986324849100af207993889002
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py	
@@ -0,0 +1,86 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+from pathlib import Path
+from einops import rearrange
+
+import torch
+import torchvision
+import numpy as np
+import imageio
+
+CODE_SUFFIXES = {
+    ".py",  # Python codes
+    ".sh",  # Shell scripts
+    ".yaml",
+    ".yml",  # Configuration files
+}
+
+
+def safe_dir(path):
+    """
+    Create a directory (or the parent directory of a file) if it does not exist.
+
+    Args:
+        path (str or Path): Path to the directory.
+
+    Returns:
+        path (Path): Path object of the directory.
+    """
+    path = Path(path)
+    path.mkdir(exist_ok=True, parents=True)
+    return path
+
+
+def safe_file(path):
+    """
+    Create the parent directory of a file if it does not exist.
+
+    Args:
+        path (str or Path): Path to the file.
+
+    Returns:
+        path (Path): Path object of the file.
+    """
+    path = Path(path)
+    path.parent.mkdir(exist_ok=True, parents=True)
+    return path
+
+
+def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=1, fps=24):
+    """save videos by video tensor
+       copy from https://github.com/guoyww/AnimateDiff/blob/e92bd5671ba62c0d774a32951453e328018b7c5b/animatediff/utils/util.py#L61
+
+    Args:
+        videos (torch.Tensor): video tensor predicted by the model
+        path (str): path to save video
+        rescale (bool, optional): rescale the video tensor from [-1, 1] to  . Defaults to False.
+        n_rows (int, optional): Defaults to 1.
+        fps (int, optional): video save fps. Defaults to 8.
+    """
+    videos = rearrange(videos, "b c t h w -> t b c h w")
+    outputs = []
+    for x in videos:
+        x = torchvision.utils.make_grid(x, nrow=n_rows)
+        x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
+        if rescale:
+            x = (x + 1.0) / 2.0  # -1,1 -> 0,1
+        x = torch.clamp(x, 0, 1)
+        x = (x * 255).numpy().astype(np.uint8)
+        outputs.append(x)
+
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    imageio.mimsave(path, outputs, fps=fps)
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a34eff96524d014cc54ce454ce9205e8d74f3072
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py
@@ -0,0 +1,30 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import math
+import numpy as np
+
+
+def align_to(value, alignment):
+    """align hight, width according to alignment
+
+    Args:
+        value (int): height or width
+        alignment (int): target alignment factor
+
+    Returns:
+        int: the aligned value
+    """
+    return int(math.ceil(value / alignment) * alignment)
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe59ad351c54956b761cf333e3899d404d43a9d8
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py
@@ -0,0 +1,55 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import collections.abc
+from itertools import repeat
+
+
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
+            x = tuple(x)
+            if len(x) == 1:
+                x = tuple(repeat(x[0], n))
+            return x
+        return tuple(repeat(x, n))
+    return parse
+
+
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+
+
+def as_tuple(x):
+    if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
+        return tuple(x)
+    if x is None or isinstance(x, (int, float, str)):
+        return (x,)
+    else:
+        raise ValueError(f"Unknown type {type(x)}")
+
+
+def as_list_of_2tuple(x):
+    x = as_tuple(x)
+    if len(x) == 1:
+        x = (x[0], x[0])
+    if len(x) % 2 != 0:
+        raise ValueError(f"Expect even length, got {len(x)}.")
+    lst = []
+    for i in range(0, len(x), 2):
+        lst.append((x[i], x[i + 1]))
+    return lst
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
new file mode 100644
index 0000000000000000000000000000000000000000..abce0a38f42733e182908ea19a3dcbdab8513f8a
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py
@@ -0,0 +1,155 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import logging
+import torch
+
+from typing import List, Optional, Union
+import torch.distributed as dist
+
+from yunchang import set_seq_parallel_pg
+
+logger = logging.getLogger(__name__)
+
+
+def init_distributed_environment(
+    world_size: int = -1,
+    rank: int = -1,
+    distributed_init_method: str = "env://",
+    local_rank: int = -1,
+    backend: str = "hccl"
+):
+    logger.debug(
+        "world_size=%d rank=%d local_rank=%d " "distributed_init_method=%s backend=%s",
+        world_size,
+        rank,
+        local_rank,
+        distributed_init_method,
+        backend,
+    )
+    if not torch.distributed.is_initialized():
+        if distributed_init_method is None:
+            raise ValueError(
+                "distributed_init_method must be provided when initializing "
+                "distributed environment"
+            )
+        # this backend is used for WORLD
+        torch.distributed.init_process_group(
+            backend=backend,
+            init_method=distributed_init_method,
+            world_size=world_size,
+            rank=rank,
+        )
+    torch.npu.set_device(rank)
+
+
+def initialize_model_parallel(
+    data_parallel_degree: int = 1,
+    classifier_free_guidance_degree: int = 1,
+    sequence_parallel_degree: int = 1,
+    ulysses_degree: int = 1,
+    ring_degree: int = 1,
+    tensor_parallel_degree: int = 1,
+    pipeline_parallel_degree: int = 1,
+    vae_parallel_size: int = 0,
+    backend: Optional[str] = None,
+) -> None:
+
+    if not torch.distributed.is_initialized():
+        raise ValueError("Distributed process group has not been initialized")
+    
+    world_size: int = torch.distributed.get_world_size()
+    backend = backend
+
+    dit_parallel_size = (data_parallel_degree *
+                     classifier_free_guidance_degree *
+                     sequence_parallel_degree *
+                     pipeline_parallel_degree *
+                     tensor_parallel_degree)
+
+    if world_size < dit_parallel_size:
+        raise RuntimeError(
+            f"world_size ({world_size}) is less than "
+            f"tensor_parallel_degree ({tensor_parallel_degree}) x "
+            f"pipeline_parallel_degree ({pipeline_parallel_degree}) x"
+            f"sequence_parallel_degree ({sequence_parallel_degree}) x"
+            f"classifier_free_guidance_degree "
+            f"({classifier_free_guidance_degree}) x"
+            f"data_parallel_degree ({data_parallel_degree})"
+        )
+    if world_size == 8:
+        set_seq_parallel_pg(
+            sp_ulysses_degree=ulysses_degree,
+            sp_ring_degree=ring_degree,
+            rank=dist.get_rank(),
+            world_size=world_size
+        )
+    elif world_size == 16:
+        set_seq_parallel_pg(
+            sp_ulysses_degree=ulysses_degree,
+            sp_ring_degree=ring_degree,
+            rank=dist.get_rank(),
+            world_size=world_size,
+            use_ulysses_low=False
+        )
+
+
+def get_sequence_parallel_world_size():
+    return dist.get_world_size()
+
+
+def get_sequence_parallel_rank():
+    return dist.get_rank()
+
+
+def all_gather(
+    input_: torch.Tensor, dim: int = 0, separate_tensors: bool = False
+) -> Union[torch.Tensor, List[torch.Tensor]]:
+    world_size = get_sequence_parallel_world_size()
+    if world_size == 1:
+        return input_
+    if dim >= input_.dim() or dim < -input_.dim():
+        raise ValueError(f"Invalid dim ({dim}) for input tensor with shape {input_.size()}")
+    if dim < 0:
+        # Convert negative dim to positive.
+        dim += input_.dim()
+    # Allocate output tensor.
+    input_size = list(input_.size())
+    input_size[0] *= world_size
+    output_tensor = torch.empty(
+        input_size, dtype=input_.dtype, device=input_.device
+    )
+    # All-gather.
+    torch.distributed.all_gather_into_tensor(
+        output_tensor, input_
+    )
+    if dim != 0:
+        input_size[0] //= world_size
+        output_tensor = output_tensor.reshape([world_size, ] + input_size)
+        output_tensor = output_tensor.movedim(0, dim)
+    if separate_tensors:
+        tensor_list = [
+            output_tensor.view(-1)
+            .narrow(0, input_.numel() * i, input_.numel())
+            .view_as(input_)
+            for i in range(world_size)
+        ]
+        return tensor_list
+    else:
+        input_size = list(input_.size())
+        input_size[dim] = input_size[dim] * world_size
+        # Reshape
+        output_tensor = output_tensor.reshape(input_size)
+        return output_tensor
\ No newline at end of file
diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..923dba211ac4d902d242903a089389bc0bbf6544
--- /dev/null
+++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py
@@ -0,0 +1,64 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import argparse
+import torch
+from transformers import (
+    AutoProcessor,
+    LlavaForConditionalGeneration,
+)
+import torch
+import torch_npu
+from torch_npu.contrib import transfer_to_npu
+
+
+def preprocess_text_encoder_tokenizer(args):
+
+    processor = AutoProcessor.from_pretrained(args.input_dir)
+    model = LlavaForConditionalGeneration.from_pretrained(
+        args.input_dir,
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True,
+    ).to(0)
+
+    model.language_model.save_pretrained(
+        f"{args.output_dir}"
+    )
+    processor.tokenizer.save_pretrained(
+        f"{args.output_dir}"
+    )
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input_dir",
+        type=str,
+        required=True,
+        help="The path to the llava-llama-3-8b-v1_1-transformers.",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="",
+        help="The output path of the llava-llama-3-8b-text-encoder-tokenizer."
+        "if '', the parent dir of output will be the same as input dir.",
+    )
+    args = parser.parse_args()
+
+    if len(args.output_dir) == 0:
+        args.output_dir = "/".join(args.input_dir.split("/")[:-1])
+
+    preprocess_text_encoder_tokenizer(args)
\ No newline at end of file