From cbe533f5c9fdbecf5a36a8a2a0b344099f53f446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E4=BA=A6=E8=88=9F?= Date: Wed, 20 Aug 2025 10:50:08 +0800 Subject: [PATCH 1/7] modify the readme and error description --- ACL_PyTorch/built-in/cv/GroundingDINO/README.md | 2 +- ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/README.md b/ACL_PyTorch/built-in/cv/GroundingDINO/README.md index f0f418ba3c..b4e3bb1cf6 100644 --- a/ACL_PyTorch/built-in/cv/GroundingDINO/README.md +++ b/ACL_PyTorch/built-in/cv/GroundingDINO/README.md @@ -120,7 +120,7 @@ python demo/image_demo_npu.py images/animals.png configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py --weight weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth --texts '$: coco' --device npu (--loop 10) # 执行视频推理命令 - python demo/video_demo_npu.py demo/demo_mot.mp4 configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth (--batch_size 16) + python demo/video_demo_npu.py demo/demo.mp4 configs/mm_grounding_dino/grounding_dino_swin-b_pretrain_obj365_goldg_v3det.py weights/grounding_dino_swin-b_pretrain_obj365_goldg_v3de-f83eef00.pth (--batch_size 16) ``` 在推理开始后,首先会默认执行warm_up,目的是执行首次编译,首次编译时间较长,在warm_up结束后,会执行推理操作,并打屏计算结果和性能数据。 diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py index aa081657c3..fac489b983 100644 --- a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py +++ b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py @@ -207,7 +207,7 @@ def main(): video_tools = init_video_tools(args, model) video_reader = video_tools.get("video_reader") if len(video_reader) < batch_size: - raise AssertionError(f"batch_size must be greater than video frame len, " + raise AssertionError(f"batch_size must be less than video frame len, " f"now frame len: {len(video_reader)}, batch_size: {batch_size}") # tokenizer -- Gitee From 36157e1d6a4acf9b90619535947b54ee366eacab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E4=BA=A6=E8=88=9F?= Date: Wed, 20 Aug 2025 11:18:16 +0800 Subject: [PATCH 2/7] modify the readme and error description --- ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py index fac489b983..fea83bbf08 100644 --- a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py +++ b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py @@ -206,8 +206,8 @@ def main(): # init video tools video_tools = init_video_tools(args, model) video_reader = video_tools.get("video_reader") - if len(video_reader) < batch_size: - raise AssertionError(f"batch_size must be less than video frame len, " + if len(video_reader) <= batch_size: + raise AssertionError(f"video frame len must be greater than batch_size, " f"now frame len: {len(video_reader)}, batch_size: {batch_size}") # tokenizer -- Gitee From 15921f2add4e9312ebf490f52d0f22f45776ae7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E4=BA=A6=E8=88=9F?= Date: Wed, 20 Aug 2025 11:33:56 +0800 Subject: [PATCH 3/7] modify wav2lip readme --- ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md b/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md index bfe51c10d1..9fb4906604 100644 --- a/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md +++ b/ACL_PyTorch/contrib/audio/wav2lip_ID100400/README.md @@ -211,7 +211,7 @@ 1. 安装ais_bench推理工具。 - 请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_infer)代码仓,根据readme文档进行工具安装。 + 请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_bench)代码仓,根据readme文档进行工具安装。 2. 执行推理。 ``` -- Gitee From 299687de153eb53cfc3f270a436f5a67a01957c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E4=BA=A6=E8=88=9F?= Date: Wed, 20 Aug 2025 11:37:24 +0800 Subject: [PATCH 4/7] modify --- ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py index fea83bbf08..f75a18229c 100644 --- a/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py +++ b/ACL_PyTorch/built-in/cv/GroundingDINO/video_demo_npu.py @@ -206,8 +206,8 @@ def main(): # init video tools video_tools = init_video_tools(args, model) video_reader = video_tools.get("video_reader") - if len(video_reader) <= batch_size: - raise AssertionError(f"video frame len must be greater than batch_size, " + if len(video_reader) < batch_size: + raise AssertionError(f"video frame len cannot be less than batch_size, " f"now frame len: {len(video_reader)}, batch_size: {batch_size}") # tokenizer -- Gitee From c6ea9a99ef65e7fbfaba7e6ff8e22d4ae71734ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E4=BA=A6=E8=88=9F?= Date: Fri, 29 Aug 2025 16:49:20 +0800 Subject: [PATCH 5/7] modify test indicator --- ACL_PyTorch/built-in/audio/whisper/README.md | 22 +++----------------- ACL_PyTorch/built-in/audio/whisper/infer.py | 9 ++++++++ 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/ACL_PyTorch/built-in/audio/whisper/README.md b/ACL_PyTorch/built-in/audio/whisper/README.md index 0e2b164d03..8f634b0e69 100644 --- a/ACL_PyTorch/built-in/audio/whisper/README.md +++ b/ACL_PyTorch/built-in/audio/whisper/README.md @@ -95,27 +95,11 @@ infer.py推理参数: warmup结束之后,开始推理librispeech_asr_dummy数据集,推理过程中会打屏输出E2E性能,推理结束后会输出WER精度得分。 -**如果你想推理过程中打印encode和decode的耗时,你可以执行以下命令:** -```SHELL -# 1. 找到当前的环境路径(简称${location}),Location后面的那一串就是当前环境路径 -pip show openai-whisper | grep Location -# 2. 记录当前whisper库decoding.py的文件路径 -${decoding_path} = ${location}/whisper/decoding.py -# 3. 执行patch文件 -patch -p1 < whisper_decoding.patch -# 可能会提示你 -# cant find file to patch at input line 3 -# ... -# File to patch: -# 这时候需要你手动指定文件路径,输入之前得到的 -${decoding_path} -# 按回车,提示 patching file ${decoding_path} 即成功 -``` ## 性能数据 在librispeech_asr_dummy/clean数据集上的性能如下: - | 模型 | 芯片 | 平均encode | 平均decode |平均E2E | + | 模型 | 芯片 | 转录比QPS | |---------|------------|----------|-----------------|---------| - | whisper | 800I A2 | 0.90ms | 3.25ms | 67.32ms | - 注:平均decode 指在decode阶段,生成单个token的平均耗时。 \ No newline at end of file + | whisper | 800I A2 | 39.33 | + 注:转录比表示1s能处理的音频长度 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/whisper/infer.py b/ACL_PyTorch/built-in/audio/whisper/infer.py index ba5da6fa13..066a46d3fd 100644 --- a/ACL_PyTorch/built-in/audio/whisper/infer.py +++ b/ACL_PyTorch/built-in/audio/whisper/infer.py @@ -17,6 +17,7 @@ import jiwer import numpy as np import pandas as pd from datasets import load_dataset +import librosa import torch from torch import nn, Tensor @@ -279,6 +280,12 @@ if __name__ == '__main__': npu_backend = tng.get_npu_backend(compiler_config=config) dataset = LibriSpeechDataset(wsp_args.speech_path, device=device) + audios = load_dataset(wsp_args.speech_path, split="validation") + duration_seconds = 0 + for audio in audios: + y, audio_sr = audio["audio"]["array"], audio["audio"]["sampling_rate"] + duration_seconds += librosa.get_duration(y=y, sr=audio_sr) + loader = torch.utils.data.DataLoader(dataset, batch_size=wsp_args.batch_size) options = whisper.DecodingOptions(language='en', without_timestamps=True, fp16=True) @@ -300,5 +307,7 @@ if __name__ == '__main__': print("{}/{} - {}".format(_step, wsp_args.warmup, result[bs].text)) print("LibriSpeech infer, English to English TRANSCRIBE ...") + start_time = time.time() p_wer = libri_speech_infer(wsp_model, options, loader) + print(f"QPS: {duration_seconds/(time.time()-start_time):.2f}") print(f"LibriSpeech infer WER score = {p_wer * 100:.2f} %") -- Gitee From c1b8ae41a493fb28c6b8c1ebf50fbc0aff65108e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E4=BA=A6=E8=88=9F?= Date: Fri, 29 Aug 2025 16:54:55 +0800 Subject: [PATCH 6/7] 1 --- ACL_PyTorch/built-in/audio/whisper/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ACL_PyTorch/built-in/audio/whisper/README.md b/ACL_PyTorch/built-in/audio/whisper/README.md index 8f634b0e69..1d60224c94 100644 --- a/ACL_PyTorch/built-in/audio/whisper/README.md +++ b/ACL_PyTorch/built-in/audio/whisper/README.md @@ -102,4 +102,4 @@ warmup结束之后,开始推理librispeech_asr_dummy数据集,推理过程 | 模型 | 芯片 | 转录比QPS | |---------|------------|----------|-----------------|---------| | whisper | 800I A2 | 39.33 | - 注:转录比表示1s能处理的音频长度 \ No newline at end of file + 注:转录比表示1s能处理的音频长度,多次运行取平均 \ No newline at end of file -- Gitee From 0d14cf331d38138211e9524293a4454ef6e8eb07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B9=E4=BA=A6=E8=88=9F?= Date: Fri, 29 Aug 2025 17:12:00 +0800 Subject: [PATCH 7/7] modify test indicator --- ACL_PyTorch/built-in/audio/whisper/README.md | 8 ++++---- ACL_PyTorch/built-in/audio/whisper/infer.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ACL_PyTorch/built-in/audio/whisper/README.md b/ACL_PyTorch/built-in/audio/whisper/README.md index 1d60224c94..b7217d973c 100644 --- a/ACL_PyTorch/built-in/audio/whisper/README.md +++ b/ACL_PyTorch/built-in/audio/whisper/README.md @@ -99,7 +99,7 @@ warmup结束之后,开始推理librispeech_asr_dummy数据集,推理过程 ## 性能数据 在librispeech_asr_dummy/clean数据集上的性能如下: - | 模型 | 芯片 | 转录比QPS | - |---------|------------|----------|-----------------|---------| - | whisper | 800I A2 | 39.33 | - 注:转录比表示1s能处理的音频长度,多次运行取平均 \ No newline at end of file + | 模型 | 芯片 | RTF | + |---------|------------|----------| + | whisper | 800I A2 | 0.0236 | + 注:RTF表示转录一段音频所需的时间与音频实际长度的比值,多次运行取平均 \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/whisper/infer.py b/ACL_PyTorch/built-in/audio/whisper/infer.py index 066a46d3fd..0617aa7eb0 100644 --- a/ACL_PyTorch/built-in/audio/whisper/infer.py +++ b/ACL_PyTorch/built-in/audio/whisper/infer.py @@ -309,5 +309,5 @@ if __name__ == '__main__': print("LibriSpeech infer, English to English TRANSCRIBE ...") start_time = time.time() p_wer = libri_speech_infer(wsp_model, options, loader) - print(f"QPS: {duration_seconds/(time.time()-start_time):.2f}") + print(f"RTF: {(time.time()-start_time)/duration_seconds:.4f}") print(f"LibriSpeech infer WER score = {p_wer * 100:.2f} %") -- Gitee