1 Star 2 Fork 0

ppnt/whisper-cpp-server

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
websocket_client.py 7.44 KB
一键复制 编辑 原始数据 按行查看 历史
李通 提交于 2年前 . update websocket_client.py
# python3 websocket_client.py --server_ip 127.0.0.1 --port 8090 --wavfile ./zh.wav
import argparse
import asyncio
import codecs
import functools
import json
import logging
import os
import time
import numpy as np
import soundfile
import websockets
class Logger(object):
def __init__(self, name: str = None):
name = 'PaddleSpeech' if not name else name
self.logger = logging.getLogger(name)
log_config = {
'DEBUG': 10,
'INFO': 20,
'TRAIN': 21,
'EVAL': 22,
'WARNING': 30,
'ERROR': 40,
'CRITICAL': 50,
'EXCEPTION': 100,
}
for key, level in log_config.items():
logging.addLevelName(level, key)
if key == 'EXCEPTION':
self.__dict__[key.lower()] = self.logger.exception
else:
self.__dict__[key.lower()] = functools.partial(self.__call__,
level)
self.format = logging.Formatter(
fmt='[%(asctime)-15s] [%(levelname)8s] - %(message)s')
self.handler = logging.StreamHandler()
self.handler.setFormatter(self.format)
self.logger.addHandler(self.handler)
self.logger.setLevel(logging.INFO)
self.logger.propagate = False
def __call__(self, log_level: str, msg: str):
self.logger.log(log_level, msg)
class ASRWsAudioHandler:
def __init__(self,
logger=None,
url=None,
port=None,
endpoint="/paddlespeech/asr/streaming", ):
"""Online ASR Server Client audio handler
Online asr server use the websocket protocal
Args:
url (str, optional): the server ip. Defaults to None.
port (int, optional): the server port. Defaults to None.
endpoint(str, optional): to compatiable with python server and c++ server.
"""
self.url = url
self.port = port
self.logger = logger
if url is None or port is None or endpoint is None:
self.url = None
else:
self.url = "ws://" + self.url + ":" + str(self.port) + endpoint
self.logger.info(f"endpoint: {self.url}")
def read_wave(self, wavfile_path: str):
"""read the audio file from specific wavfile path
Args:
wavfile_path (str): the audio wavfile,
we assume that audio sample rate matches the model
Yields:
numpy.array: the samall package audio pcm data
"""
samples, sample_rate = soundfile.read(wavfile_path, dtype='int16')
x_len = len(samples)
assert sample_rate == 16000
chunk_size = int(85 * sample_rate / 1000) # 85ms, sample_rate = 16kHz
if x_len % chunk_size != 0:
padding_len_x = chunk_size - x_len % chunk_size
else:
padding_len_x = 0
padding = np.zeros((padding_len_x), dtype=samples.dtype)
padded_x = np.concatenate([samples, padding], axis=0)
assert (x_len + padding_len_x) % chunk_size == 0
num_chunk = (x_len + padding_len_x) / chunk_size
num_chunk = int(num_chunk)
for i in range(0, num_chunk):
start = i * chunk_size
end = start + chunk_size
x_chunk = padded_x[start:end]
yield x_chunk
async def run(self, wavfile_path: str):
"""Send a audio file to online server
Args:
wavfile_path (str): audio path
Returns:
str: the final asr result
"""
logging.debug("send a message to the server")
results = []
if self.url is None:
self.logger.error("No asr server, please input valid ip and port")
return results
# 1. send websocket handshake protocal
start_time = time.time()
async with websockets.connect(self.url) as ws:
# 2. server has already received handshake protocal
# client start to send the command
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "start",
"nbest": 1
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
self.logger.info("client receive msg={}".format(msg))
# 3. send chunk audio data to engine
for chunk_data in self.read_wave(wavfile_path):
await ws.send(chunk_data.tobytes())
msg = await ws.recv()
if msg:
try:
json_object = json.loads(msg)
self.logger.info("client receive msg={}".format(json_object))
if "result" in json_object:
result = json_object.get("result")
for sentence in result:
# print(type(sentence))
sentence = "{}->{}:{}".format(sentence['t0'], sentence['t1'], sentence['sentence'])
results.append(sentence)
# print(sentence)
except Exception as e:
self.logger.error("Unexpected error: {}".format(e))
# 4. we must send finished signal to the server
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "end",
"nbest": 1
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
# 5. decode the bytes to str
json_object = json.loads(msg)
# 6. logging the final result and comptute the statstics
elapsed_time = time.time() - start_time
audio_info = soundfile.info(wavfile_path)
self.logger.info("client final receive msg={}".format(json_object))
# print(type(json_object))
if "result" in json_object:
result = json_object.get("result")
for sentence in result:
# print(type(sentence))
sentence = "{}->{}:{}".format(sentence['t0'], sentence['t1'], sentence['sentence'])
results.append(sentence)
# print(sentence)
self.logger.info(
f"audio duration: {audio_info.duration}, elapsed time: {elapsed_time}, RTF={elapsed_time / audio_info.duration}"
)
return results
logger = Logger()
def main(args):
logger.info("asr websocket client start")
handler = ASRWsAudioHandler(
logger,
args.server_ip,
args.port,
endpoint=args.endpoint)
loop = asyncio.get_event_loop()
# support to process single audio file
if args.wavfile and os.path.exists(args.wavfile):
logger.info(f"start to process the wavscp: {args.wavfile}")
results = loop.run_until_complete(handler.run(args.wavfile))
if results:
for sentence in results:
print(sentence)
# support to process batch audios from wav.scp
if args.wavscp and os.path.exists(args.wavscp):
logger.info(f"start to process the wavscp: {args.wavscp}")
with codecs.open(args.wavscp, 'r', encoding='utf-8') as f, \
codecs.open("result.txt", 'w', encoding='utf-8') as w:
for line in f:
utt_name, utt_path = line.strip().split()
result = loop.run_until_complete(handler.run(utt_path))
result = result["result"]
w.write(f"{utt_name} {result}\n")
if __name__ == "__main__":
logger.info("Start to do streaming asr client")
parser = argparse.ArgumentParser()
parser.add_argument(
'--server_ip', type=str, default='127.0.0.1', help='server ip')
parser.add_argument('--port', type=int, default=8090, help='server port')
parser.add_argument(
"--endpoint",
type=str,
default="/paddlespeech/asr/streaming",
help="ASR websocket endpoint")
parser.add_argument(
"--wavfile",
action="store",
help="wav file path ",
default="./16_audio.wav")
parser.add_argument(
"--wavscp", type=str, default=None, help="The batch audios dict text")
args = parser.parse_args()
main(args)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
HTML
1
https://gitee.com/ppnt/whisper-cpp-server.git
git@gitee.com:ppnt/whisper-cpp-server.git
ppnt
whisper-cpp-server
whisper-cpp-server
main

搜索帮助