From 3a36a69c3f7e13b94a3b3d6a2ebd1c9a576ec668 Mon Sep 17 00:00:00 2001 From: lijian Date: Sat, 15 Nov 2025 17:13:51 +0800 Subject: [PATCH] readme clean up in whisper and cosyvoice2, modify cosyvoice dependencies and patch --- .../audio/CosyVoice2/800I/diff_CosyVoice_800I.patch | 12 +++++++----- ACL_PyTorch/built-in/audio/CosyVoice2/README.md | 6 ++++-- .../built-in/audio/CosyVoice2/requirements.txt | 5 ++--- ACL_PyTorch/built-in/audio/whisper/README.md | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/ACL_PyTorch/built-in/audio/CosyVoice2/800I/diff_CosyVoice_800I.patch b/ACL_PyTorch/built-in/audio/CosyVoice2/800I/diff_CosyVoice_800I.patch index d5021894c1..43da5a07ac 100644 --- a/ACL_PyTorch/built-in/audio/CosyVoice2/800I/diff_CosyVoice_800I.patch +++ b/ACL_PyTorch/built-in/audio/CosyVoice2/800I/diff_CosyVoice_800I.patch @@ -1,5 +1,5 @@ diff --git a/cosyvoice/cli/cosyvoice.py b/cosyvoice/cli/cosyvoice.py -index e2d62e2..a0512a4 100644 +index e2d62e2..1929ed6 100644 --- a/cosyvoice/cli/cosyvoice.py +++ b/cosyvoice/cli/cosyvoice.py @@ -13,11 +13,15 @@ @@ -27,14 +27,16 @@ index e2d62e2..a0512a4 100644 self.instruct = True if '-Instruct' in model_dir else False self.model_dir = model_dir self.fp16 = fp16 -@@ -155,6 +159,26 @@ class CosyVoice2(CosyVoice): +@@ -155,6 +159,28 @@ class CosyVoice2(CosyVoice): self.model.load_trt('{}/flow.decoder.estimator.{}.mygpu.plan'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'), '{}/flow.decoder.estimator.fp32.onnx'.format(model_dir), self.fp16) + if load_om: + soc_version = acl.get_soc_name() + context = None -+ if '910B3' in soc_version: ++ context_required_socs = ['Ascend910B3', 'Ascend910B4'] ++ need_ctx = soc_version in context_required_socs ++ if need_ctx: + context, ret = acl.rt.get_context() + if ret: + raise RuntimeError(f"Get context failed, retcode is {ret}.") @@ -43,7 +45,7 @@ index e2d62e2..a0512a4 100644 + flow_om = InferSession(0, '{}/flow_{}_{}.om'.format(model_dir, system ,arch)) + flow_om_static = InferSession(0, '{}/flow_static.om'.format(model_dir)) + speech_om = InferSession(0, '{}/speech_{}_{}.om'.format(model_dir, system ,arch)) -+ if '910B3' in soc_version: ++ if need_ctx: + ret = acl.rt.set_context(context) + if ret: + raise RuntimeError(f"Set context failed, retcode is {ret}.") @@ -54,7 +56,7 @@ index e2d62e2..a0512a4 100644 del configs def inference_instruct(self, *args, **kwargs): -@@ -171,3 +195,19 @@ class CosyVoice2(CosyVoice): +@@ -171,3 +197,19 @@ class CosyVoice2(CosyVoice): logging.info('yield speech len {}, rtf {}'.format(speech_len, (time.time() - start_time) / speech_len)) yield model_output start_time = time.time() diff --git a/ACL_PyTorch/built-in/audio/CosyVoice2/README.md b/ACL_PyTorch/built-in/audio/CosyVoice2/README.md index dc8d2b55ce..28b01195c3 100644 --- a/ACL_PyTorch/built-in/audio/CosyVoice2/README.md +++ b/ACL_PyTorch/built-in/audio/CosyVoice2/README.md @@ -55,7 +55,7 @@ cd ModelZoo-PyTorch/ACL_PyTorch/built-in/audio/CosyVoice2 cd CosyVoice git reset --hard fd45708 git submodule update --init --recursive - # 根据当前使用机型,叠加patch。如果当前使用机型为313T 800T A2,和800I共用patch文件 + # 根据当前使用机型,叠加patch。如果当前使用机型为313T 800T A2,300I A2, 和800I共用patch文件 git apply ../${platform}/diff_CosyVoice_${platform}.patch # 将infer.py复制到CosyVoice中 cp ../infer.py ./ @@ -89,9 +89,11 @@ cd ModelZoo-PyTorch/ACL_PyTorch/built-in/audio/CosyVoice2 2. 安装依赖 ``` pip3 install -r ../requirements.txt + pip3 install tokenizers==0.14.0 + pip3 install huggingface-hub==0.23.5 apt-get install sox # centos版本 yum install sox ``` - 注:如果遇到无法安装WeTextProcessing的场景,例如提示安装pyinit报错,可以参考以下方法手动安装编译 + 手动安装编译WeTextProcessing ```bash # 下载安装包并解压 wget https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.8.3.tar.gz diff --git a/ACL_PyTorch/built-in/audio/CosyVoice2/requirements.txt b/ACL_PyTorch/built-in/audio/CosyVoice2/requirements.txt index 10cde21086..03aecef40e 100644 --- a/ACL_PyTorch/built-in/audio/CosyVoice2/requirements.txt +++ b/ACL_PyTorch/built-in/audio/CosyVoice2/requirements.txt @@ -12,7 +12,7 @@ inflect==7.3.1 librosa==0.10.2 lightning==2.2.4 matplotlib==3.7.5 -modelscope==1.14.0 +modelscope==1.15.0 networkx==3.1 omegaconf==2.3.0 onnx==1.16.0 @@ -30,5 +30,4 @@ torchaudio==2.3.1 uvicorn==0.30.0 wget==3.2 fastapi==0.111.0 -fastapi-cli==0.0.4 -WeTextProcessing==1.0.4.1 \ No newline at end of file +fastapi-cli==0.0.4 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/whisper/README.md b/ACL_PyTorch/built-in/audio/whisper/README.md index fd6a094b9b..651f99a155 100644 --- a/ACL_PyTorch/built-in/audio/whisper/README.md +++ b/ACL_PyTorch/built-in/audio/whisper/README.md @@ -23,7 +23,7 @@ Whisper 是 OpenAI 开源的通用语音识别模型,支持多语言转录和 | Python | 3.10 | - | | PyTorch | 2.5.1 | - | | Ascend Extension PyTorch | 2.5.1 | - | - | 说明:Atlas 800I A2 推理卡和Atlas 300I DUO 推理卡请以CANN版本选择实际固件与驱动版本。 | \ | \ | + | 说明:仅支持Atlas 800I A2,不支持300I。 | \ | \ | ## 获取本仓源码 -- Gitee