From dd70fbbd727d4cae3ac413b3065c2ec00b7216b8 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Tue, 19 Nov 2024 10:13:16 +0000 Subject: [PATCH] update README.md. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- README.md | 308 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 254 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 73b8f19..462ce15 100644 --- a/README.md +++ b/README.md @@ -286,7 +286,10 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH - StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py -首先,获取算法所需数据集: +首先,根据代码配置文件StreamLearn/Algorithm/Algorithm_SAFC/request_import.py安装库完成环境配置 + + +然后,获取算法所需数据集: 地址: 通过百度网盘分享的文件:SAFC_datasets_CIFAR10.zip @@ -296,70 +299,267 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH 说明: 本实验需要用到的数据集是CIFAR10,本例将data_batch_1.mat至data_batch_4.mat中的数据作为第一阶段数据;data_batch_2.mat中的数据作为第二阶段的数据;test_batch.mat中的数据是测试数据 - -然后,根据代码配置文件StreamLearn/Algorithm/Algorithm_SAFC/request_import.py安装库完成环境配置 +流式数据读取: +```python +# 数据接口 +def readtopython(path): + data_part=loadmat(path) + X_part=data_part["data"].astype(np.float64) + Y_part=data_part["labels"].astype(np.float64) + return X_part,Y_part + +# Batch数据接口 +def readbatchtoPython(Path): + for path_index in range(len(Path)): + path=Path[path_index] + X_part,Y_part=readtopython(path) + if path_index==0: + X_past_original=X_part + Y_past_original=Y_part + if path_index!=0: + X_past_original=np.vstack((X_past_original,X_part)) + Y_past_original=np.vstack((Y_past_original,Y_part)) + # 第一阶段的数据没有No.9,即第10类数据 + Position_past=np.where(Y_past_original!=9)[0] + Y_past=Y_past_original[Position_past] + # 特征不含B通道的1024维特征 + X_past=X_past_original[Position_past,0:2048] + return X_past,Y_past +``` 其次,调用SAFC算法进行训练: + SAFC训练基于第一阶段复用的SVM模型,因此先训练SVM,并保存训练好的SVM模型: ```python -print('####eval####') -print("begin svm1 training!") -print('####eval####') -svm1 = SVC(probability=True,kernel="linear",decision_function_shape='ovo') -svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) -dump(svm1, save_dir+'/svm1.model') -print('####eval####') -print("end svm1 training!") -print('####eval####') - -# 复用SVM,训练SAFC_D,并保存训练后的模型: -print('####eval####') -print("begin SAFC_D training!") -print('####eval####') -w_ours1 = SAFC_D(np.mat(svm1._get_coef()), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) -dump(w_ours1, save_dir+'/SAFC_D.model') - -# 复用SVM,训练SAFC_ID,并保存训练后的模型: -print('####eval####') -print("begin SAFC_ID training!") -print('####eval####') -w_ours2 = SAFC_ID(np.mat(svm1._get_coef()),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) -dump(w_ours2, save_dir+'/SAFC_ID.model') +def SAFC_Stage1(PathSet,save_dir): + # 第一阶段数据读取与模型训练 + print('####eval####') + print("开始读取第一阶段数据!") + print('####eval####') + + X_past,Y_past=readbatchtoPython(PathSet) + X_past,Y_past=datareconsrtuct(X_past,Y_past) + Y_past=Y_past+1 + + print('####eval####') + print("第一阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s1=X_past + label_s1_vec=Y_past + label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) + + # 归一化 + print('####eval####') + print("第一阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s1 = scaler.fit_transform(data_s1) + print('####eval####') + print("第一阶段数据归一化完成!") + print('####eval####') + + # 第一阶段数据特征输出 + print('####eval####') + print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) + print('####eval####') + + # 第一阶段模型训练与存储,用SVM训练第一阶段模型 + print('####eval####') + print("begin svm1 training!") + print('####eval####') + svm1 = LinearSVC() + svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) + dump(svm1, save_dir+'/svm1.model') + print('####eval####') + print("end svm1 training!") + print('####eval####') + + return +``` + +复用SVM,在第二阶段训练SAFC_D和SAFC_ID两个变体,并保存训练后的模型: +```python +def SAFC_Stage2(new_path,svm1,save_dir): + # Paras + eta = 0.1 + alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] + alpha = 0.001 + beta = 0.01 + + # 第二阶段数据读取与模型训练 + print('####eval####') + print("开始读取第二阶段数据!") + print('####eval####') + + X_new,Y_new=readtopython(new_path) + X_new,Y_new=datareconsrtuct(X_new,Y_new) + Y_new=Y_new+1 + + print('####eval####') + print("第二阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s2=X_new + label_s2_vec=Y_new + label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) + + # 归一化 + print('####eval####') + print("第二阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s2 = scaler.fit_transform(data_s2) + print('####eval####') + print("第二阶段数据归一化完成!") + print('####eval####') + + # 第二阶段数据特征输出 + print('####eval####') + print("第二阶段涉及特征新增与类别新增!") + print('####eval####') + print('####eval####') + print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) + print('####eval####') + + # 第二阶段模型训练与存储 + # 变体一 + alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) + startours1 = time.time() + print('####eval####') + print("begin SAFC_D training!") + print('####eval####') + w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) + dump(w_ours1, save_dir+'/SAFC_D.model') + timeours1 = time.time() - startours1 + print('####eval####') + print(timeours1) + print('####eval####') + print('####eval####') + print("end SAFC_D training!") + print('####eval####') + + # 变体二 + alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) + startours2=time.time() + print('####eval####') + print("begin SAFC_ID training!") + print('####eval####') + w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) + dump(w_ours2, save_dir+'/SAFC_ID.model') + timeours2=time.time()-startours2 + print('####eval####') + print(timeours2) + print('####eval####') + print('####eval####') + print("end SAFC_ID training!") + print('####eval####') + + return # 其中,alpha_best1, beta_best1, alpha_best2, beta_best2, eta是超参数 ``` -最后,分别对两个模型进行性能测试: +最后,读取测试数据并完成测试与性能评估: ```python -# SAFC_D: -print('####eval####') -print("begin SAFC_D testing!") -print('####eval####') -pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) -print('####eval####') -print("begin SAFC_D evaluation!") -print('####eval####') -Acc_ours1.append(acc_ours1) -AUC_ours1.append(auc_ours1) -F1_weight_ours1.append(f1wei_ours1) -F1_macro_ours1.append(f1macro_ours1) -F1_micro_ours1.append(f1micro_ours1) - -# SAFC_ID: -print('####eval####') -print("begin SAFC_ID testing!") -print('####eval####') -pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) -print('####eval####') -print("begin SAFC_ID evaluation!") -print('####eval####') -Acc_ours2.append(acc_ours2) -AUC_ours2.append(auc_ours2) -F1_weight_ours2.append(f1wei_ours2) -F1_macro_ours2.append(f1macro_ours2) -F1_micro_ours2.append(f1micro_ours2) +def SAFC_test(test_path,w_ours1,w_ours2): + # Metrics + Acc_ours1, Acc_ours2= [], [] + AUC_ours1, AUC_ours2 = [], [] + F1_weight_ours1, F1_weight_ours2 = [], [] + F1_macro_ours1, F1_macro_ours2 = [], [] + F1_micro_ours1, F1_micro_ours2 = [], [] + + # 测试数据读取与评估 + print('####eval####') + print("开始读取测试数据!") + print('####eval####') + + X_test,Y_test=readtopython(test_path) + X_test,Y_test=datareconsrtuct(X_test,Y_test) + Y_test=Y_test+1 + + print('####eval####') + print("测试数据读取完成!") + print('####eval####') + + # Given and transform + test_data=X_test + test_label_vec=Y_test + test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) + + # 归一化 + print('####eval####') + print("测试数据归一化!") + print('####eval####') + scaler = StandardScaler() + test_data = scaler.fit_transform(test_data) + print('####eval####') + print("测试数据归一化完成!") + print('####eval####') + + # 测试数据特征输出 + print('####eval####') + print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) + print('####eval####') + + # Evaluation + # 变体一 + print('####eval####') + print("begin SAFC_D testing!") + print('####eval####') + pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_D evaluation!") + print('####eval####') + Acc_ours1.append(acc_ours1) + AUC_ours1.append(auc_ours1) + F1_weight_ours1.append(f1wei_ours1) + F1_macro_ours1.append(f1macro_ours1) + F1_micro_ours1.append(f1micro_ours1) + + # 变体二 + print('####eval####') + print("begin SAFC_ID testing!") + print('####eval####') + pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_ID evaluation!") + print('####eval####') + Acc_ours2.append(acc_ours2) + AUC_ours2.append(auc_ours2) + F1_weight_ours2.append(f1wei_ours2) + F1_macro_ours2.append(f1macro_ours2) + F1_micro_ours2.append(f1micro_ours2) + + # 结果输出 + print('####eval####') + print("预测与评估完成,输出评估结果!") + print('####eval####') + + meanAcc_ours1 = np.mean(Acc_ours1) + meanAcc_ours2 = np.mean(Acc_ours2) + meanAuc_ours1 = np.mean(AUC_ours1) + meanAuc_ours2 = np.mean(AUC_ours2) + meanF1_macro_ours1 = np.mean(F1_macro_ours1) + meanF1_macro_ours2 = np.mean(F1_macro_ours2) + meanF1_weight_ours1 = np.mean(F1_weight_ours1) + meanF1_weight_ours2 = np.mean(F1_weight_ours2) + meanF1_micro_ours1 = np.mean(F1_micro_ours1) + meanF1_micro_ours2 = np.mean(F1_micro_ours2) + + print('####eval####') + print( + 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' + .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) + print('####eval####') + print('Finished!') + + return ``` -- Gitee