diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e518cb109d600e986324849100af207993889002 --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/ file_utils.py @@ -0,0 +1,86 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +from pathlib import Path +from einops import rearrange + +import torch +import torchvision +import numpy as np +import imageio + +CODE_SUFFIXES = { + ".py", # Python codes + ".sh", # Shell scripts + ".yaml", + ".yml", # Configuration files +} + + +def safe_dir(path): + """ + Create a directory (or the parent directory of a file) if it does not exist. + + Args: + path (str or Path): Path to the directory. + + Returns: + path (Path): Path object of the directory. + """ + path = Path(path) + path.mkdir(exist_ok=True, parents=True) + return path + + +def safe_file(path): + """ + Create the parent directory of a file if it does not exist. + + Args: + path (str or Path): Path to the file. + + Returns: + path (Path): Path object of the file. + """ + path = Path(path) + path.parent.mkdir(exist_ok=True, parents=True) + return path + + +def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=1, fps=24): + """save videos by video tensor + copy from https://github.com/guoyww/AnimateDiff/blob/e92bd5671ba62c0d774a32951453e328018b7c5b/animatediff/utils/util.py#L61 + + Args: + videos (torch.Tensor): video tensor predicted by the model + path (str): path to save video + rescale (bool, optional): rescale the video tensor from [-1, 1] to . Defaults to False. + n_rows (int, optional): Defaults to 1. + fps (int, optional): video save fps. Defaults to 8. + """ + videos = rearrange(videos, "b c t h w -> t b c h w") + outputs = [] + for x in videos: + x = torchvision.utils.make_grid(x, nrow=n_rows) + x = x.transpose(0, 1).transpose(1, 2).squeeze(-1) + if rescale: + x = (x + 1.0) / 2.0 # -1,1 -> 0,1 + x = torch.clamp(x, 0, 1) + x = (x * 255).numpy().astype(np.uint8) + outputs.append(x) + + os.makedirs(os.path.dirname(path), exist_ok=True) + imageio.mimsave(path, outputs, fps=fps) \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a34eff96524d014cc54ce454ce9205e8d74f3072 --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/data_utils.py @@ -0,0 +1,30 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import math +import numpy as np + + +def align_to(value, alignment): + """align hight, width according to alignment + + Args: + value (int): height or width + alignment (int): target alignment factor + + Returns: + int: the aligned value + """ + return int(math.ceil(value / alignment) * alignment) \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..fe59ad351c54956b761cf333e3899d404d43a9d8 --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/helpers.py @@ -0,0 +1,55 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import collections.abc +from itertools import repeat + + +def _ntuple(n): + def parse(x): + if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): + x = tuple(x) + if len(x) == 1: + x = tuple(repeat(x[0], n)) + return x + return tuple(repeat(x, n)) + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) + + +def as_tuple(x): + if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): + return tuple(x) + if x is None or isinstance(x, (int, float, str)): + return (x,) + else: + raise ValueError(f"Unknown type {type(x)}") + + +def as_list_of_2tuple(x): + x = as_tuple(x) + if len(x) == 1: + x = (x[0], x[0]) + if len(x) % 2 != 0: + raise ValueError(f"Expect even length, got {len(x)}.") + lst = [] + for i in range(0, len(x), 2): + lst.append((x[i], x[i + 1])) + return lst \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py new file mode 100644 index 0000000000000000000000000000000000000000..abce0a38f42733e182908ea19a3dcbdab8513f8a --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/parallel_mgr.py @@ -0,0 +1,155 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import logging +import torch + +from typing import List, Optional, Union +import torch.distributed as dist + +from yunchang import set_seq_parallel_pg + +logger = logging.getLogger(__name__) + + +def init_distributed_environment( + world_size: int = -1, + rank: int = -1, + distributed_init_method: str = "env://", + local_rank: int = -1, + backend: str = "hccl" +): + logger.debug( + "world_size=%d rank=%d local_rank=%d " "distributed_init_method=%s backend=%s", + world_size, + rank, + local_rank, + distributed_init_method, + backend, + ) + if not torch.distributed.is_initialized(): + if distributed_init_method is None: + raise ValueError( + "distributed_init_method must be provided when initializing " + "distributed environment" + ) + # this backend is used for WORLD + torch.distributed.init_process_group( + backend=backend, + init_method=distributed_init_method, + world_size=world_size, + rank=rank, + ) + torch.npu.set_device(rank) + + +def initialize_model_parallel( + data_parallel_degree: int = 1, + classifier_free_guidance_degree: int = 1, + sequence_parallel_degree: int = 1, + ulysses_degree: int = 1, + ring_degree: int = 1, + tensor_parallel_degree: int = 1, + pipeline_parallel_degree: int = 1, + vae_parallel_size: int = 0, + backend: Optional[str] = None, +) -> None: + + if not torch.distributed.is_initialized(): + raise ValueError("Distributed process group has not been initialized") + + world_size: int = torch.distributed.get_world_size() + backend = backend + + dit_parallel_size = (data_parallel_degree * + classifier_free_guidance_degree * + sequence_parallel_degree * + pipeline_parallel_degree * + tensor_parallel_degree) + + if world_size < dit_parallel_size: + raise RuntimeError( + f"world_size ({world_size}) is less than " + f"tensor_parallel_degree ({tensor_parallel_degree}) x " + f"pipeline_parallel_degree ({pipeline_parallel_degree}) x" + f"sequence_parallel_degree ({sequence_parallel_degree}) x" + f"classifier_free_guidance_degree " + f"({classifier_free_guidance_degree}) x" + f"data_parallel_degree ({data_parallel_degree})" + ) + if world_size == 8: + set_seq_parallel_pg( + sp_ulysses_degree=ulysses_degree, + sp_ring_degree=ring_degree, + rank=dist.get_rank(), + world_size=world_size + ) + elif world_size == 16: + set_seq_parallel_pg( + sp_ulysses_degree=ulysses_degree, + sp_ring_degree=ring_degree, + rank=dist.get_rank(), + world_size=world_size, + use_ulysses_low=False + ) + + +def get_sequence_parallel_world_size(): + return dist.get_world_size() + + +def get_sequence_parallel_rank(): + return dist.get_rank() + + +def all_gather( + input_: torch.Tensor, dim: int = 0, separate_tensors: bool = False +) -> Union[torch.Tensor, List[torch.Tensor]]: + world_size = get_sequence_parallel_world_size() + if world_size == 1: + return input_ + if dim >= input_.dim() or dim < -input_.dim(): + raise ValueError(f"Invalid dim ({dim}) for input tensor with shape {input_.size()}") + if dim < 0: + # Convert negative dim to positive. + dim += input_.dim() + # Allocate output tensor. + input_size = list(input_.size()) + input_size[0] *= world_size + output_tensor = torch.empty( + input_size, dtype=input_.dtype, device=input_.device + ) + # All-gather. + torch.distributed.all_gather_into_tensor( + output_tensor, input_ + ) + if dim != 0: + input_size[0] //= world_size + output_tensor = output_tensor.reshape([world_size, ] + input_size) + output_tensor = output_tensor.movedim(0, dim) + if separate_tensors: + tensor_list = [ + output_tensor.view(-1) + .narrow(0, input_.numel() * i, input_.numel()) + .view_as(input_) + for i in range(world_size) + ] + return tensor_list + else: + input_size = list(input_.size()) + input_size[dim] = input_size[dim] * world_size + # Reshape + output_tensor = output_tensor.reshape(input_size) + return output_tensor \ No newline at end of file diff --git a/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..923dba211ac4d902d242903a089389bc0bbf6544 --- /dev/null +++ b/MindIE/MultiModal/HunyuanVideo/hyvideo/utils/preprocess_text_encoder_tokenizer_utils.py @@ -0,0 +1,64 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import argparse +import torch +from transformers import ( + AutoProcessor, + LlavaForConditionalGeneration, +) +import torch +import torch_npu +from torch_npu.contrib import transfer_to_npu + + +def preprocess_text_encoder_tokenizer(args): + + processor = AutoProcessor.from_pretrained(args.input_dir) + model = LlavaForConditionalGeneration.from_pretrained( + args.input_dir, + torch_dtype=torch.float16, + low_cpu_mem_usage=True, + ).to(0) + + model.language_model.save_pretrained( + f"{args.output_dir}" + ) + processor.tokenizer.save_pretrained( + f"{args.output_dir}" + ) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_dir", + type=str, + required=True, + help="The path to the llava-llama-3-8b-v1_1-transformers.", + ) + parser.add_argument( + "--output_dir", + type=str, + default="", + help="The output path of the llava-llama-3-8b-text-encoder-tokenizer." + "if '', the parent dir of output will be the same as input dir.", + ) + args = parser.parse_args() + + if len(args.output_dir) == 0: + args.output_dir = "/".join(args.input_dir.split("/")[:-1]) + + preprocess_text_encoder_tokenizer(args) \ No newline at end of file