2 Star 29 Fork 7

李子 / Sound_File_Processing

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
long_file_cut_by_srt.py 3.30 KB
一键复制 编辑 原始数据 按行查看 历史
高级特工 提交于 2022-01-08 08:43 . update long_file_cut_by_srt.py.
import os
import time
from pydub import AudioSegment
import short_file_plicing
from utils.file_io import file_r, listdir, file_w
def open_sound_file(file_path):
file_type = file_path.split(".")[-1:][0]
if file_type == "mp3":
return AudioSegment.from_mp3(file_path)
elif file_type == "wav":
return AudioSegment.from_wav(file_path)
def srt_time_to_mstime(srt_str):
l = srt_str.split(" --> ")
start_time = l[0]
end_time = l[1]
start_time_l = start_time.split(",")
h, m, s = start_time_l[0].split(":")
start_time = int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(start_time_l[1])
end_time_l = end_time.split(",")
h, m, s = end_time_l[0].split(":")
end_time = int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(end_time_l[1])
return start_time, end_time
def cut_file_by_srt(sound_root_path, srt_dict):
for file_name in srt_dict:
for file_type in [".wav", ".mp3"]:
print(sound_root_path + "/input/" + file_name + file_type)
try:
sound = open_sound_file(sound_root_path + "/input/" + file_name + file_type)
except:
continue
else:
break
# print(sound.duration_seconds) # IDE 懂个屁
for line in srt_dict[file_name]:
if len(line) == 1:
continue
(start_time, end_time) = srt_time_to_mstime(line[1])
short_str = ""
if end_time - start_time <= min_length or len(line[2].replace(" ", "")) < min_text:
short_str = "_short"
if start_time:
sound_cut = sound[start_time + 50:end_time + 50] # 网易的裁切太激进了,所以加50毫秒
else:
sound_cut = sound[start_time:end_time + 50]
if not os.path.exists(sound_root_path + "/output"+short_str+"/"):
os.mkdir(sound_root_path + "/output"+short_str+"/")
sound_cut.export(sound_root_path + "/output"+short_str+"/" + file_name + "_" + str(line[0]) + ".wav",
format="wav")
# transcript_short
file_w(transcript_root_path + "/transcript"+short_str+".txt",
file_name + "_" + str(line[0]) + " " + line[2].replace(" ", "") + "\n",
"a")
if __name__ == '__main__':
srt_root_path = "srt"
sound_root_path = "wav"
transcript_root_path = "transcript"
min_length = 100 # 如果音频长度(毫秒)短于这里的值,会被分到short文件夹中
min_text = 6 # 如果文字长度短于这里的值,会被分到short文件夹中
srt_file_list = []
srt_dict = {}
listdir(srt_root_path, srt_file_list)
for srt_path in srt_file_list:
srt_name = srt_path.split("/")[-1:][0].split(".")[0].replace("CHS_", "")
srt_list = []
print(srt_path)
srt_string = file_r(srt_path)
for cut_str in srt_string.split("\n\n"):
srt_list.append(cut_str.split("\n"))
srt_dict[srt_name] = srt_list
cut_file_by_srt(sound_root_path, srt_dict)
is_continue = input("输入1后按回车键自动拼接短音频,输入2停止程序(注意:请先修改short_file_plicing中的参数)")
print(str(is_continue) == "1")
if str(is_continue) == "1":
short_file_plicing.main()
Python
1
https://gitee.com/kslizi/Sound_File_Processing.git
git@gitee.com:kslizi/Sound_File_Processing.git
kslizi
Sound_File_Processing
Sound_File_Processing
master

搜索帮助