代码拉取完成,页面将自动刷新
# -*- coding: utf-8 -*-
import os
import glob
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def main():
data = []
with open("./data/finvcup9th_1st_ds4/train_label.txt", "r") as f:
for tmp in f.readlines():
data.append(tmp.replace("\n", "").split(","))
df_data = pd.DataFrame(data, columns=["wav_path", "label"])
df_data["wav_path"] = df_data["wav_path"].apply(lambda x:"./data/finvcup9th_1st_ds4/train/"+x)
#训练集,验证集划分
paths = df_data["wav_path"].values
labels = df_data["label"].values
X_train, X_valid, y_train, y_valid = train_test_split(paths, labels, test_size=0.2, random_state=42)
df_train = pd.DataFrame(X_train, columns=["wav_path"])
df_train["label"] = y_train
df_valid = pd.DataFrame(X_valid, columns=["wav_path"])
df_valid["label"] = y_valid
df_train.to_csv("./data/finvcup9th_1st_ds4/finvcup9th_1st_ds4_train_data.csv", index=False)
df_valid.to_csv("./data/finvcup9th_1st_ds4/finvcup9th_1st_ds4_valid_data.csv", index=False)
#测试集生成
test_speeches = glob.glob(os.path.join("./data/finvcup9th_1st_ds4/test", "*.wav"))
df_test = pd.DataFrame(test_speeches, columns=["wav_path"])
df_test.to_csv("./data/finvcup9th_1st_ds4/finvcup9th_1st_ds4_test_data.csv", index=False)
print("done!")
if __name__=='__main__':
main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。