diff --git a/README.md b/README.md index 437fb7e3850eda4d44c1f84f7ed5a633cb43833c..7ce63eb821b800a4ff41453f534847e0f8b9ab66 100644 --- a/README.md +++ b/README.md @@ -339,7 +339,10 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH - StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py -首先,获取算法所需数据集: +首先,根据代码配置文件StreamLearn/Algorithm/Algorithm_SAFC/request_import.py安装库完成环境配置 + + +然后,获取算法所需数据集: 地址: 通过百度网盘分享的文件:SAFC_datasets_CIFAR10.zip @@ -347,84 +350,300 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH 提取码:nudt 说明: -本实验需要用到的数据集是CIFAR10,本例将data_batch_1.mat至data_batch_4.mat中的数据作为第一阶段数据;data_batch_2.mat中的数据作为第二阶段的数据;test_batch.mat中的数据是测试数据 - +本实验需要用到的数据集是CIFAR10,将部分batch中的数据作为第一阶段数据;部分数据作为第二阶段的数据;test_batch.mat中的数据是测试数据 -然后,根据代码配置文件StreamLearn/Algorithm/Algorithm_SAFC/request_import.py安装库完成环境配置 +流式数据读取,传入数据地址: +```python +def __init__(self,args_address): + # 存储地址 + self.save_dir=args_address.save_dir + + # 读取地址 + self.PathSet=args_address.PathSet + self.new_path=args_address.new_path + self.test_path=args_address.test_path +``` 其次,调用SAFC算法进行训练: -SAFC训练基于第一阶段复用的SVM模型,因此先训练SVM,并保存训练好的SVM模型: -```python -print('####eval####') -print("begin svm1 training!") -print('####eval####') -svm1 = SVC(probability=True,kernel="linear",decision_function_shape='ovo') -svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) -dump(svm1, save_dir+'/svm1.model') -print('####eval####') -print("end svm1 training!") -print('####eval####') - -# 复用SVM,训练SAFC_D,并保存训练后的模型: -print('####eval####') -print("begin SAFC_D training!") -print('####eval####') -w_ours1 = SAFC_D(np.mat(svm1._get_coef()), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) -dump(w_ours1, save_dir+'/SAFC_D.model') - -# 复用SVM,训练SAFC_ID,并保存训练后的模型: -print('####eval####') -print("begin SAFC_ID training!") -print('####eval####') -w_ours2 = SAFC_ID(np.mat(svm1._get_coef()),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) -dump(w_ours2, save_dir+'/SAFC_ID.model') +```python +def stream_fit(self): + # 第一阶段数据读取与训练 + SAFC_Stage1(self.PathSet,self.save_dir) + # 第一阶段模型提取 + print('####eval####') + print("Read stage1 model!") + print('####eval####') + stage1model=load(self.save_dir+'/svm1.model') + print('####eval####') + print("Have read stage1 model!") + print('####eval####') + + # 第二阶段数据读取与训练 + SAFC_Stage2(self.new_path,stage1model,self.save_dir) + # 第二阶段模型提取 + # 变体一 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model1=load(self.save_dir+'/SAFC_D.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') + # 变体二 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model2=load(self.save_dir+'/SAFC_ID.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') +``` + +具体而言,SAFC训练基于第一阶段复用的SVM模型,因此先训练SVM,并保存训练好的SVM模型: +```python +def SAFC_Stage1(PathSet,save_dir): + # 第一阶段数据读取与模型训练 + print('####eval####') + print("开始读取第一阶段数据!") + print('####eval####') + + X_past,Y_past=readbatchtoPython(PathSet) + X_past,Y_past=datareconsrtuct(X_past,Y_past) + Y_past=Y_past+1 + + print('####eval####') + print("第一阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s1=X_past + label_s1_vec=Y_past + label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) + + # 归一化 + print('####eval####') + print("第一阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s1 = scaler.fit_transform(data_s1) + print('####eval####') + print("第一阶段数据归一化完成!") + print('####eval####') + + # 第一阶段数据特征输出 + print('####eval####') + print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) + print('####eval####') + + # 第一阶段模型训练与存储,用SVM训练第一阶段模型 + print('####eval####') + print("begin svm1 training!") + print('####eval####') + svm1 = LinearSVC() + svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) + dump(svm1, save_dir+'/svm1.model') + print('####eval####') + print("end svm1 training!") + print('####eval####') + + return +``` + +复用SVM,在第二阶段训练SAFC_D和SAFC_ID两个变体,并保存训练后的模型: +```python +def SAFC_Stage2(new_path,svm1,save_dir): + # Paras + eta = 0.1 + alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] + alpha = 0.001 + beta = 0.01 + + # 第二阶段数据读取与模型训练 + print('####eval####') + print("开始读取第二阶段数据!") + print('####eval####') + + X_new,Y_new=readtopython(new_path) + X_new,Y_new=datareconsrtuct(X_new,Y_new) + Y_new=Y_new+1 + + print('####eval####') + print("第二阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s2=X_new + label_s2_vec=Y_new + label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) + + # 归一化 + print('####eval####') + print("第二阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s2 = scaler.fit_transform(data_s2) + print('####eval####') + print("第二阶段数据归一化完成!") + print('####eval####') + + # 第二阶段数据特征输出 + print('####eval####') + print("第二阶段涉及特征新增与类别新增!") + print('####eval####') + print('####eval####') + print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) + print('####eval####') + + # 第二阶段模型训练与存储 + # 变体一 + alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) + startours1 = time.time() + print('####eval####') + print("begin SAFC_D training!") + print('####eval####') + w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) + dump(w_ours1, save_dir+'/SAFC_D.model') + timeours1 = time.time() - startours1 + print('####eval####') + print(timeours1) + print('####eval####') + print('####eval####') + print("end SAFC_D training!") + print('####eval####') + + # 变体二 + alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) + startours2=time.time() + print('####eval####') + print("begin SAFC_ID training!") + print('####eval####') + w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) + dump(w_ours2, save_dir+'/SAFC_ID.model') + timeours2=time.time()-startours2 + print('####eval####') + print(timeours2) + print('####eval####') + print('####eval####') + print("end SAFC_ID training!") + print('####eval####') + + return # 其中,alpha_best1, beta_best1, alpha_best2, beta_best2, eta是超参数 ``` -最后,分别对两个模型进行性能测试: +最后,读取测试数据并完成测试与性能评估: ```python -# SAFC_D: -print('####eval####') -print("begin SAFC_D testing!") -print('####eval####') -pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) -print('####eval####') -print("begin SAFC_D evaluation!") -print('####eval####') -Acc_ours1.append(acc_ours1) -AUC_ours1.append(auc_ours1) -F1_weight_ours1.append(f1wei_ours1) -F1_macro_ours1.append(f1macro_ours1) -F1_micro_ours1.append(f1micro_ours1) - -# SAFC_ID: -print('####eval####') -print("begin SAFC_ID testing!") -print('####eval####') -pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) -print('####eval####') -print("begin SAFC_ID evaluation!") -print('####eval####') -Acc_ours2.append(acc_ours2) -AUC_ours2.append(auc_ours2) -F1_weight_ours2.append(f1wei_ours2) -F1_macro_ours2.append(f1macro_ours2) -F1_micro_ours2.append(f1micro_ours2) +def stream_evaluate(self): + # 测试数据读取与评估 + SAFC_test(self.test_path,stage2model1,stage2model2) ``` - -在test文件中,按照不同的数据流场景,提供两个main文件 -- StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py -对应一次性读取两阶段数据并完成测试 -- StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py -对应分批次读取两阶段数据,存储模型,读取测试数据,调用模型并完成测试 +具体而言, +```python +def SAFC_test(test_path,w_ours1,w_ours2): + # Metrics + Acc_ours1, Acc_ours2= [], [] + AUC_ours1, AUC_ours2 = [], [] + F1_weight_ours1, F1_weight_ours2 = [], [] + F1_macro_ours1, F1_macro_ours2 = [], [] + F1_micro_ours1, F1_micro_ours2 = [], [] + + # 测试数据读取与评估 + print('####eval####') + print("开始读取测试数据!") + print('####eval####') + + X_test,Y_test=readtopython(test_path) + X_test,Y_test=datareconsrtuct(X_test,Y_test) + Y_test=Y_test+1 + + print('####eval####') + print("测试数据读取完成!") + print('####eval####') + + # Given and transform + test_data=X_test + test_label_vec=Y_test + test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) + + # 归一化 + print('####eval####') + print("测试数据归一化!") + print('####eval####') + scaler = StandardScaler() + test_data = scaler.fit_transform(test_data) + print('####eval####') + print("测试数据归一化完成!") + print('####eval####') + + # 测试数据特征输出 + print('####eval####') + print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) + print('####eval####') + + # Evaluation + # 变体一 + print('####eval####') + print("begin SAFC_D testing!") + print('####eval####') + pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_D evaluation!") + print('####eval####') + Acc_ours1.append(acc_ours1) + AUC_ours1.append(auc_ours1) + F1_weight_ours1.append(f1wei_ours1) + F1_macro_ours1.append(f1macro_ours1) + F1_micro_ours1.append(f1micro_ours1) + + # 变体二 + print('####eval####') + print("begin SAFC_ID testing!") + print('####eval####') + pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_ID evaluation!") + print('####eval####') + Acc_ours2.append(acc_ours2) + AUC_ours2.append(auc_ours2) + F1_weight_ours2.append(f1wei_ours2) + F1_macro_ours2.append(f1macro_ours2) + F1_micro_ours2.append(f1micro_ours2) + + # 结果输出 + print('####eval####') + print("预测与评估完成,输出评估结果!") + print('####eval####') + + meanAcc_ours1 = np.mean(Acc_ours1) + meanAcc_ours2 = np.mean(Acc_ours2) + meanAuc_ours1 = np.mean(AUC_ours1) + meanAuc_ours2 = np.mean(AUC_ours2) + meanF1_macro_ours1 = np.mean(F1_macro_ours1) + meanF1_macro_ours2 = np.mean(F1_macro_ours2) + meanF1_weight_ours1 = np.mean(F1_weight_ours1) + meanF1_weight_ours2 = np.mean(F1_weight_ours2) + meanF1_micro_ours1 = np.mean(F1_micro_ours1) + meanF1_micro_ours2 = np.mean(F1_micro_ours2) + + print('####eval####') + print( + 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' + .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) + print('####eval####') + print('Finished!') + + return +``` ModelsIN用于存放训练获得的分类器 +测试主文件为test_SAFC.py + ## 课题四 diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Class_CIFAR10_SAFC_DandSAFC_ID.py b/StreamLearn/Algorithm/Algorithm_SAFC/Class_CIFAR10_SAFC_DandSAFC_ID.py new file mode 100644 index 0000000000000000000000000000000000000000..0bf55c0025583557cadaf24c099cfaf5ce1eea04 --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/Class_CIFAR10_SAFC_DandSAFC_ID.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import time + +import numpy as np +import math + +from sklearn.preprocessing import StandardScaler +from sklearn import model_selection +from sklearn.metrics import * + +from sklearn import svm +from sklearn.svm import SVC +from sklearn.svm import LinearSVC + +import scipy.io as io +from scipy.io import loadmat +import os +from joblib import dump,load + +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC_Stream_Funcs import * + +class SAFC_achieve(): + def __init__(self,args_address): + # %% 第一阶段、第二阶段、测试main函数 + # 存储地址 + self.save_dir=args_address.save_dir + + # 读取地址 + self.PathSet=args_address.PathSet + self.new_path=args_address.new_path + self.test_path=args_address.test_path + + def stream_fit(self): + # 第一阶段数据读取与训练 + SAFC_Stage1(self.PathSet,self.save_dir) + # 第一阶段模型提取 + print('####eval####') + print("Read stage1 model!") + print('####eval####') + stage1model=load(self.save_dir+'/svm1.model') + print('####eval####') + print("Have read stage1 model!") + print('####eval####') + + # 第二阶段数据读取与训练 + SAFC_Stage2(self.new_path,stage1model,self.save_dir) + # 第二阶段模型提取 + # 变体一 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model1=load(self.save_dir+'/SAFC_D.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') + # 变体二 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model2=load(self.save_dir+'/SAFC_ID.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') + + def stream_evaluate(self): + # 测试数据读取与评估 + SAFC_test(self.test_path,stage2model1,stage2model2) + diff --git a/StreamLearn/tests/tests_SAFC/ModelsIN/.keep b/StreamLearn/Algorithm/Algorithm_SAFC/ModelsIN/.keep similarity index 100% rename from StreamLearn/tests/tests_SAFC/ModelsIN/.keep rename to StreamLearn/Algorithm/Algorithm_SAFC/ModelsIN/.keep diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py b/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py index c8486ba84ad3b782e7aa885fcafb5c360f29cfae..49ba73d973e511232a7f64d4cef2a75ee5378dd4 100644 --- a/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py +++ b/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py @@ -21,7 +21,8 @@ def readtopython(path): return X_part,Y_part # Batch数据接口 -def readbatchtoPython(Path): +# Stage 1 +def readbatchtoPython_Past(Path): for path_index in range(len(Path)): path=Path[path_index] X_part,Y_part=readtopython(path) @@ -38,4 +39,19 @@ def readbatchtoPython(Path): X_past=X_past_original[Position_past,0:2048] return X_past,Y_past +# Stage 2 +def readbatchtoPython_New(Path): + for path_index in range(len(Path)): + path=Path[path_index] + X_part,Y_part=readtopython(path) + if path_index==0: + X_new_original=X_part + Y_new_original=Y_part + if path_index!=0: + X_new_original=np.vstack((X_new_original,X_part)) + Y_new_original=np.vstack((Y_new_original,Y_part)) + X_new=X_new_original + Y_new=Y_new_original + return X_new,Y_new + diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py index 343167d277f31ab48bdc74f909b7202c146a2858..4f7f4b4be787f58b1989b2aa37b6d371de4c8501 100644 --- a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py +++ b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py @@ -14,14 +14,6 @@ from sklearn.preprocessing import StandardScaler from sklearn import model_selection from sklearn.metrics import * -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - from sklearn import svm from sklearn.svm import SVC from sklearn.svm import LinearSVC @@ -31,6 +23,16 @@ from scipy.io import loadmat import os from joblib import dump,load +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * + # %% Adress and Pre-define # 定义一个稀疏矩阵的方法 def ind2vec(indices, num_classes): @@ -56,7 +58,7 @@ def SAFC_Stage1(PathSet,save_dir): print("开始读取第一阶段数据!") print('####eval####') - X_past,Y_past=readbatchtoPython(PathSet) + X_past,Y_past=readbatchtoPython_Past(PathSet) X_past,Y_past=datareconsrtuct(X_past,Y_past) Y_past=Y_past+1 @@ -110,7 +112,7 @@ def SAFC_Stage2(new_path,svm1,save_dir): print("开始读取第二阶段数据!") print('####eval####') - X_new,Y_new=readtopython(new_path) + X_new,Y_new=readbatchtoPython_New(new_path) X_new,Y_new=datareconsrtuct(X_new,Y_new) Y_new=Y_new+1 diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py b/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py index 071fdbad0102cf41a4e0b0e38a72938ea8786474..5ac7cc99dddb617ce6ac6d3c75d9095e28f44b59 100644 --- a/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py +++ b/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py @@ -15,14 +15,6 @@ from sklearn.preprocessing import StandardScaler from sklearn import model_selection from sklearn.metrics import * -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - from sklearn import svm from sklearn.svm import SVC from sklearn.svm import LinearSVC @@ -30,4 +22,16 @@ from sklearn.svm import LinearSVC import scipy.io as io from scipy.io import loadmat import os -from joblib import dump,load \ No newline at end of file +from joblib import dump,load + +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC_Stream_Funcs import * +from StreamLearn.Algorithm.Algorithm_SAFC.Class_CIFAR10_SAFC_DandSAFC_ID import * \ No newline at end of file diff --git a/StreamLearn/tests/test_SAFC.py b/StreamLearn/tests/test_SAFC.py new file mode 100644 index 0000000000000000000000000000000000000000..916ff878961d01b43a9e78e0c6bd802df8990c1c --- /dev/null +++ b/StreamLearn/tests/test_SAFC.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Nov 22 20:02:07 2024 + +@author: zhangxinyue +""" + +import time + +import numpy as np +import math + +from sklearn.preprocessing import StandardScaler +from sklearn import model_selection +from sklearn.metrics import * + +from sklearn import svm +from sklearn.svm import SVC +from sklearn.svm import LinearSVC + +import scipy.io as io +from scipy.io import loadmat +import os +from joblib import dump,load + +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC_Stream_Funcs import * +from StreamLearn.Algorithm.Algorithm_SAFC.Class_CIFAR10_SAFC_DandSAFC_ID import * + +#%% +def train_and_evaluate_stream_SAFC(args_address,runT): + alg_SAFC=SAFC_achieve(args_address) + for i in range(runT): + alg_SAFC.stream_fit() + alg_SAFC.stream_evaluate() + return + +def main(): + # 地址参数定义 + data_batch_dir="Dataset//" + args_address=SimpleNamespace( + save_dir="ModelsIN", + PathSet=[data_batch_dir+"data_batch_1.mat",data_batch_dir+"data_batch_2.mat",data_batch_dir+"data_batch_3.mat",data_batch_dir+"data_batch_4.mat"], + new_path=[data_batch_dir+"data_batch_5.mat"], + test_path=data_batch_dir+"test_batch.mat") + # 运行次数定义 + runT=3 + # 运行 + train_and_evaluate_stream_SAFC(args_address,runT) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py b/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py deleted file mode 100644 index 4129044407289043f1d9ab7201872efa7290a7b2..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py +++ /dev/null @@ -1,279 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load - -# %% Pre-define -# 定义一个稀疏矩阵的方法 -def ind2vec(indices, num_classes): - indices = np.asarray(indices) - out = np.zeros((num_classes, np.shape(indices)[1])) - for i in range(num_classes): - index3 = np.where(i + 1 == indices)[1] - out[i, index3] = 1 - return out - -def datareconsrtuct(X,Y): - #从大到小排列Y - sorted_index=np.argsort(Y, axis=None) - newY=Y[sorted_index] - #相应替换X的位置 - newX=X[sorted_index] - return newX,newY - -# %% -def SAFC_DandSAFC_ID_streamdata_all(PathSet,new_path,test_path): - # 存储地址 - save_dir="ModelsIN" - - # %%Parameters and given values - # Metrics - Acc_ours1, Acc_ours2= [], [] - AUC_ours1, AUC_ours2 = [], [] - F1_weight_ours1, F1_weight_ours2 = [], [] - F1_macro_ours1, F1_macro_ours2 = [], [] - F1_micro_ours1, F1_micro_ours2 = [], [] - - # Paras - eta = 0.1 - alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] - alpha = 0.001 - beta = 0.01 - - # %% 第一阶段数据读取与模型训练 - print('####eval####') - print("开始读取第一阶段数据!") - print('####eval####') - - X_past,Y_past=readbatchtoPython(PathSet) - # datareconstruction - X_past,Y_past=datareconsrtuct(X_past,Y_past) - Y_past=Y_past+1 - - print('####eval####') - print("第一阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s1=X_past - label_s1_vec=Y_past - label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) - - # 归一化 - print('####eval####') - print("第一阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s1 = scaler.fit_transform(data_s1) - print('####eval####') - print("第一阶段数据归一化完成!") - print('####eval####') - - # 第一阶段数据特征输出 - print('####eval####') - print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) - print('####eval####') - - # 第一阶段模型训练与存储,用SVM训练第一阶段模型 - print('####eval####') - print("begin svm1 training!") - print('####eval####') - svm1 = LinearSVC() - svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) - dump(svm1, save_dir+'/svm1.model') - print('####eval####') - print("end svm1 training!") - print('####eval####') - - # %% 第二阶段数据读取与模型训练 - print('####eval####') - print("开始读取第二阶段数据!") - print('####eval####') - - X_new,Y_new=readtopython(new_path) - X_new,Y_new=datareconsrtuct(X_new,Y_new) - Y_new=Y_new+1 - - print('####eval####') - print("第二阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s2=X_new - label_s2_vec=Y_new - label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) - - # 归一化 - print('####eval####') - print("第二阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s2 = scaler.fit_transform(data_s2) - print('####eval####') - print("第二阶段数据归一化完成!") - print('####eval####') - - # 第二阶段数据特征输出 - print('####eval####') - print("第二阶段涉及特征新增与类别新增!") - print('####eval####') - print('####eval####') - print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) - print('####eval####') - - # 第二阶段模型训练与存储 - # 变体一 - alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) - startours1 = time.time() - print('####eval####') - print("begin SAFC_D training!") - print('####eval####') - w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) - dump(w_ours1, save_dir+'/SAFC_D.model') - timeours1 = time.time() - startours1 - print('####eval####') - print(timeours1) - print('####eval####') - print('####eval####') - print("end SAFC_D training!") - print('####eval####') - - # 变体二 - alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) - startours2=time.time() - print('####eval####') - print("begin SAFC_ID training!") - print('####eval####') - w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) - dump(w_ours2, save_dir+'/SAFC_ID.model') - timeours2=time.time()-startours2 - print('####eval####') - print(timeours2) - print('####eval####') - print('####eval####') - print("end SAFC_ID training!") - print('####eval####') - - # %%测试数据读取与评估 - print('####eval####') - print("开始读取测试数据!") - print('####eval####') - - X_test,Y_test=readtopython(test_path) - X_test,Y_test=datareconsrtuct(X_test,Y_test) - Y_test=Y_test+1 - - print('####eval####') - print("测试数据读取完成!") - print('####eval####') - - # Given and transform - test_data=X_test - test_label_vec=Y_test - test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) - - # 归一化 - print('####eval####') - print("测试数据归一化!") - print('####eval####') - scaler = StandardScaler() - test_data = scaler.fit_transform(test_data) - print('####eval####') - print("测试数据归一化完成!") - print('####eval####') - - # 测试数据特征输出 - print('####eval####') - print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) - print('####eval####') - - # Evaluation - # 变体一 - print('####eval####') - print("begin SAFC_D testing!") - print('####eval####') - pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_D evaluation!") - print('####eval####') - Acc_ours1.append(acc_ours1) - AUC_ours1.append(auc_ours1) - F1_weight_ours1.append(f1wei_ours1) - F1_macro_ours1.append(f1macro_ours1) - F1_micro_ours1.append(f1micro_ours1) - - # 变体二 - print('####eval####') - print("begin SAFC_ID testing!") - print('####eval####') - pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_ID evaluation!") - print('####eval####') - Acc_ours2.append(acc_ours2) - AUC_ours2.append(auc_ours2) - F1_weight_ours2.append(f1wei_ours2) - F1_macro_ours2.append(f1macro_ours2) - F1_micro_ours2.append(f1micro_ours2) - - # 结果输出 - print('####eval####') - print("预测与评估完成,输出评估结果!") - print('####eval####') - - meanAcc_ours1 = np.mean(Acc_ours1) - meanAcc_ours2 = np.mean(Acc_ours2) - meanAuc_ours1 = np.mean(AUC_ours1) - meanAuc_ours2 = np.mean(AUC_ours2) - meanF1_macro_ours1 = np.mean(F1_macro_ours1) - meanF1_macro_ours2 = np.mean(F1_macro_ours2) - meanF1_weight_ours1 = np.mean(F1_weight_ours1) - meanF1_weight_ours2 = np.mean(F1_weight_ours2) - meanF1_micro_ours1 = np.mean(F1_micro_ours1) - meanF1_micro_ours2 = np.mean(F1_micro_ours2) - - print('####eval####') - print( - 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' - .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) - print('####eval####') - print('Finished!') - - return -# %% 主程序调用区 -if __name__ == "__main__": - # 读取地址 - data_batch_dir="Dataset//" - PathSet=[data_batch_dir+"data_batch_1.mat",data_batch_dir+"data_batch_2.mat",data_batch_dir+"data_batch_3.mat",data_batch_dir+"data_batch_4.mat"] - new_path=data_batch_dir+"data_batch_5.mat" - test_path=data_batch_dir+"test_batch.mat" - - SAFC_DandSAFC_ID_streamdata_all(PathSet,new_path,test_path) \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py b/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py deleted file mode 100644 index 6f2ee543d8c0fa3a6d7572c3be5268bbae1b7c55..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py +++ /dev/null @@ -1,77 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load - -# %% 第一阶段、第二阶段、测试main函数 -# 存储地址 -save_dir="ModelsIN" - -# 读取地址 -data_batch_dir="Dataset//" -PathSet=[data_batch_dir+"data_batch_1.mat",data_batch_dir+"data_batch_2.mat",data_batch_dir+"data_batch_3.mat",data_batch_dir+"data_batch_4.mat"] -new_path=data_batch_dir+"data_batch_5.mat" -test_path=data_batch_dir+"test_batch.mat" - -# 第一阶段数据读取与训练 -SAFC_Stage1(PathSet,save_dir) -# 第一阶段模型提取 -print('####eval####') -print("Read stage1 model!") -print('####eval####') -stage1model=load(save_dir+'/svm1.model') -print('####eval####') -print("Have read stage1 model!") -print('####eval####') - -# 第二阶段数据读取与训练 -SAFC_Stage2(new_path,stage1model,save_dir) -# 第二阶段模型提取 -# 变体一 -print('####eval####') -print("Read stage2-SAFC_D model!") -print('####eval####') -stage2model1=load(save_dir+'/SAFC_D.model') -print('####eval####') -print("Have read stage2-SAFC_D model!") -print('####eval####') -# 变体二 -print('####eval####') -print("Read stage2-SAFC_D model!") -print('####eval####') -stage2model2=load(save_dir+'/SAFC_ID.model') -print('####eval####') -print("Have read stage2-SAFC_D model!") -print('####eval####') - -# 测试数据读取与评估 -SAFC_test(test_path,stage2model1,stage2model2) - diff --git a/StreamLearn/tests/tests_SAFC/Dataset/dataset_information b/StreamLearn/tests/tests_SAFC/Dataset/dataset_information deleted file mode 100644 index d04db552d4f06fe538020dcfb5ea4f1e60365274..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/Dataset/dataset_information +++ /dev/null @@ -1,4 +0,0 @@ -数据集 -通过百度网盘分享的文件:SAFC_datasets_CIFAR10.zip -链接:https://pan.baidu.com/s/1xtZjSxIIEMnUwoM7VXCzkQ -提取码:nudt \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/MetricsPred.py b/StreamLearn/tests/tests_SAFC/MetricsPred.py deleted file mode 100644 index 84c63597090711c9be48bfa3edeb846ad70d5cb5..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/MetricsPred.py +++ /dev/null @@ -1,19 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from sklearn.metrics import * - -# %% -def MetricsPred(test_label,test_label_mat,pred,prob): - - acc = np.mean(test_label.flatten()== pred.flatten()) - AUC=roc_auc_score(test_label_mat.tolist(),prob.tolist()) - F1_weight=f1_score(test_label.tolist(),pred.tolist(), average='weighted') - F1_macro=f1_score(test_label.tolist(),pred.tolist(), average='macro') - F1_micro=f1_score(test_label.tolist(),pred.tolist(), average='micro') - return acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/Read_to_Python.py b/StreamLearn/tests/tests_SAFC/Read_to_Python.py deleted file mode 100644 index c8486ba84ad3b782e7aa885fcafb5c360f29cfae..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/Read_to_Python.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from scipy.io import loadmat - -# %% -# tips -# import os -# print(f"当前工作目录: {os.getcwd()}") - -# 数据接口 -def readtopython(path): - data_part=loadmat(path) - X_part=data_part["data"].astype(np.float64) - Y_part=data_part["labels"].astype(np.float64) - return X_part,Y_part - -# Batch数据接口 -def readbatchtoPython(Path): - for path_index in range(len(Path)): - path=Path[path_index] - X_part,Y_part=readtopython(path) - if path_index==0: - X_past_original=X_part - Y_past_original=Y_part - if path_index!=0: - X_past_original=np.vstack((X_past_original,X_part)) - Y_past_original=np.vstack((Y_past_original,Y_part)) - # 第一阶段的数据没有No.9,即第10类数据 - Position_past=np.where(Y_past_original!=9)[0] - Y_past=Y_past_original[Position_past] - # 特征不含B通道的1024维特征 - X_past=X_past_original[Position_past,0:2048] - return X_past,Y_past - - diff --git a/StreamLearn/tests/tests_SAFC/SAFC.py b/StreamLearn/tests/tests_SAFC/SAFC.py deleted file mode 100644 index dbf74f489e854d7f5e4a8edc0dd816903c48319e..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/SAFC.py +++ /dev/null @@ -1,165 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math - -#%% slove Memory Error key -#redefine X_21@D: -def AxdiagB(A,B): - #A=d*N,B=1*N - BL=B.tolist()[0] - C=[] - for i in range(np.shape(A)[0]): - C_row=[] - for j in range(np.shape(A)[1]): - ele=BL[j]*A[i,j] - C_row.append(ele) - C.append(C_row) - C_mat=np.mat(C) - return C_mat -#%% -# 函数1、SAFC_D -def SAFC_D(W1,train_data,train_label,alpha,beta,eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier. -##%% Our proposed method - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) - # 计算 W2_train_data 和 col_max - W2_train_data = np.dot(W2, train_data) - col_max = np.max(W2_train_data, axis=0) - M = W2_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - W22 = W22 - eta*W22_grad - W21=np.vstack(((W21_tilde),W21_hat)) - W2=np.hstack(((W21),W22)) - if object_value==[]: - np.mat(object_value.append(cost)) - else: - object_value=np.vstack(((object_value),cost)) - softmaxModel = W2 - return softmaxModel - -#%% -# 函数2、SAFC_ID函数 -def SAFC_ID(W1, train_data, train_label, alpha,beta, eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier . -##%% Our proposed method -#%set old cost and new cost value - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - epsilon = 0.00001 - # % epsilon = 1; - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) - # 计算 W2_train_data 和 col_max - W2_train_data = np.dot(W2, train_data) - col_max = np.max(W2_train_data, axis=0) - M = W2_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - reg = (W21_tilde@X_21-W1@X_21).T - norm_column = [] - for i in range(nSam2): - norm_column.append(np.linalg.norm(reg[i,:])) - norm_column=np.mat(norm_column) - MM = 1.0/(norm_column+epsilon) - #D = np.diag(MM.tolist()[0])#% D = eye(nSam2)Memory Error - - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(norm_column[0:]) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T - - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - W22 = W22 - eta*W22_grad - W21=np.vstack(((W21_tilde),W21_hat)) - W2=np.hstack(((W21),W22)) - if object_value==[]: - np.mat(object_value.append(cost)) - else: - object_value=np.vstack(((object_value),cost)) - softmaxModel = W2 - - return softmaxModel \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/SAFC_Stream_Funcs.py b/StreamLearn/tests/tests_SAFC/SAFC_Stream_Funcs.py deleted file mode 100644 index 343167d277f31ab48bdc74f909b7202c146a2858..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/SAFC_Stream_Funcs.py +++ /dev/null @@ -1,273 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load - -# %% Adress and Pre-define -# 定义一个稀疏矩阵的方法 -def ind2vec(indices, num_classes): - indices = np.asarray(indices) - out = np.zeros((num_classes, np.shape(indices)[1])) - for i in range(num_classes): - index3 = np.where(i + 1 == indices)[1] - out[i, index3] = 1 - return out - -def datareconsrtuct(X,Y): - #从大到小排列Y - sorted_index=np.argsort(Y, axis=None) - newY=Y[sorted_index] - #相应替换X的位置 - newX=X[sorted_index] - return newX,newY - -# %% 定义第一阶段的读取训练与存储 -def SAFC_Stage1(PathSet,save_dir): - # 第一阶段数据读取与模型训练 - print('####eval####') - print("开始读取第一阶段数据!") - print('####eval####') - - X_past,Y_past=readbatchtoPython(PathSet) - X_past,Y_past=datareconsrtuct(X_past,Y_past) - Y_past=Y_past+1 - - print('####eval####') - print("第一阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s1=X_past - label_s1_vec=Y_past - label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) - - # 归一化 - print('####eval####') - print("第一阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s1 = scaler.fit_transform(data_s1) - print('####eval####') - print("第一阶段数据归一化完成!") - print('####eval####') - - # 第一阶段数据特征输出 - print('####eval####') - print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) - print('####eval####') - - # 第一阶段模型训练与存储,用SVM训练第一阶段模型 - print('####eval####') - print("begin svm1 training!") - print('####eval####') - svm1 = LinearSVC() - svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) - dump(svm1, save_dir+'/svm1.model') - print('####eval####') - print("end svm1 training!") - print('####eval####') - - return - -# %% 定义第二阶段的读取训练与存储 -def SAFC_Stage2(new_path,svm1,save_dir): - # Paras - eta = 0.1 - alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] - alpha = 0.001 - beta = 0.01 - - # 第二阶段数据读取与模型训练 - print('####eval####') - print("开始读取第二阶段数据!") - print('####eval####') - - X_new,Y_new=readtopython(new_path) - X_new,Y_new=datareconsrtuct(X_new,Y_new) - Y_new=Y_new+1 - - print('####eval####') - print("第二阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s2=X_new - label_s2_vec=Y_new - label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) - - # 归一化 - print('####eval####') - print("第二阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s2 = scaler.fit_transform(data_s2) - print('####eval####') - print("第二阶段数据归一化完成!") - print('####eval####') - - # 第二阶段数据特征输出 - print('####eval####') - print("第二阶段涉及特征新增与类别新增!") - print('####eval####') - print('####eval####') - print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) - print('####eval####') - - # 第二阶段模型训练与存储 - # 变体一 - alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) - startours1 = time.time() - print('####eval####') - print("begin SAFC_D training!") - print('####eval####') - w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) - dump(w_ours1, save_dir+'/SAFC_D.model') - timeours1 = time.time() - startours1 - print('####eval####') - print(timeours1) - print('####eval####') - print('####eval####') - print("end SAFC_D training!") - print('####eval####') - - # 变体二 - alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) - startours2=time.time() - print('####eval####') - print("begin SAFC_ID training!") - print('####eval####') - w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) - dump(w_ours2, save_dir+'/SAFC_ID.model') - timeours2=time.time()-startours2 - print('####eval####') - print(timeours2) - print('####eval####') - print('####eval####') - print("end SAFC_ID training!") - print('####eval####') - - return - -# %% 定义测试数据读取与评估 -def SAFC_test(test_path,w_ours1,w_ours2): - # Metrics - Acc_ours1, Acc_ours2= [], [] - AUC_ours1, AUC_ours2 = [], [] - F1_weight_ours1, F1_weight_ours2 = [], [] - F1_macro_ours1, F1_macro_ours2 = [], [] - F1_micro_ours1, F1_micro_ours2 = [], [] - - # 测试数据读取与评估 - print('####eval####') - print("开始读取测试数据!") - print('####eval####') - - X_test,Y_test=readtopython(test_path) - X_test,Y_test=datareconsrtuct(X_test,Y_test) - Y_test=Y_test+1 - - print('####eval####') - print("测试数据读取完成!") - print('####eval####') - - # Given and transform - test_data=X_test - test_label_vec=Y_test - test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) - - # 归一化 - print('####eval####') - print("测试数据归一化!") - print('####eval####') - scaler = StandardScaler() - test_data = scaler.fit_transform(test_data) - print('####eval####') - print("测试数据归一化完成!") - print('####eval####') - - # 测试数据特征输出 - print('####eval####') - print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) - print('####eval####') - - # Evaluation - # 变体一 - print('####eval####') - print("begin SAFC_D testing!") - print('####eval####') - pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_D evaluation!") - print('####eval####') - Acc_ours1.append(acc_ours1) - AUC_ours1.append(auc_ours1) - F1_weight_ours1.append(f1wei_ours1) - F1_macro_ours1.append(f1macro_ours1) - F1_micro_ours1.append(f1micro_ours1) - - # 变体二 - print('####eval####') - print("begin SAFC_ID testing!") - print('####eval####') - pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_ID evaluation!") - print('####eval####') - Acc_ours2.append(acc_ours2) - AUC_ours2.append(auc_ours2) - F1_weight_ours2.append(f1wei_ours2) - F1_macro_ours2.append(f1macro_ours2) - F1_micro_ours2.append(f1micro_ours2) - - # 结果输出 - print('####eval####') - print("预测与评估完成,输出评估结果!") - print('####eval####') - - meanAcc_ours1 = np.mean(Acc_ours1) - meanAcc_ours2 = np.mean(Acc_ours2) - meanAuc_ours1 = np.mean(AUC_ours1) - meanAuc_ours2 = np.mean(AUC_ours2) - meanF1_macro_ours1 = np.mean(F1_macro_ours1) - meanF1_macro_ours2 = np.mean(F1_macro_ours2) - meanF1_weight_ours1 = np.mean(F1_weight_ours1) - meanF1_weight_ours2 = np.mean(F1_weight_ours2) - meanF1_micro_ours1 = np.mean(F1_micro_ours1) - meanF1_micro_ours2 = np.mean(F1_micro_ours2) - - print('####eval####') - print( - 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' - .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) - print('####eval####') - print('Finished!') - - return \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/compared_method.py b/StreamLearn/tests/tests_SAFC/compared_method.py deleted file mode 100644 index 613c1c2164f03b41f5c47f835da3894fece94d25..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/compared_method.py +++ /dev/null @@ -1,242 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math -from sklearn import model_selection - -from predict import * - -#%% slove Memory Error key -#redefine X_21@D: -def AxdiagB(A,B): - #A=d*N,B=1*N - BL=B.tolist()[0] - C=[] - for i in range(np.shape(A)[0]): - C_row=[] - for j in range(np.shape(A)[1]): - ele=BL[j]*A[i,j] - C_row.append(ele) - C.append(C_row) - C_mat=np.mat(C) - return C_mat - -# %% -# 函数1-1、mysoftmax函数中涉及到的find_best_w函数 -def find_best_w(train_data,train_label, alpha, eta): - [n_Fea,n_Sam] = np.shape(train_data) - nCla = np.shape(train_label)[0] - W = 0.005*np.ones((nCla,n_Fea)) - count=0 #% count the running number - #%set old cost and new cost value - cost_old=0 - cost=1 - object_value=[] #% record the cost at each update iteration - loop_max=3000 - while (abs(cost_old - cost) > 10^-6)and (count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W*train_data,max(W*train_data, [], 1)); - # 计算 W_train_data 和 col_max - W_train_data = np.dot(W, train_data) - col_max = np.max(W_train_data, axis=0) - M = W_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - #.flatten()张成向量 - cost = -1/n_Sam * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W)) - W_grad = -1/n_Sam * (train_label - p) @ train_data.T + 2*alpha * W - W = W - eta*W_grad - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - W1=W - return W1 - -#%% -# 函数1、mysoftmax函数 -def mysoftmax( train_data,train_label,alpha_set,eta): -##%% Input: -#% train_data: d*n, where d is the dimension of data. -#% n is the number of data. -#% train_label: c*n, each column is a one-hot vector. -#% alpha_set: the hyperparameters set, which is used for cross-validation. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: c*d, the learned classifier. -#% alpha_best: the best hyperparameter determined by cross-validation. -##%% cross validation - if len(alpha_set)>1: - alpha_num = len(alpha_set) - n = np.shape(train_data)[1] - k_fold = 5 - #Indices = crossvalind('Kfold', n, k_fold) - kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) - acc_statistic = np.zeros((alpha_num,1)) - for train_index, test_index in kf.split(train_data.T): - #print("TRAIN:", train_index, "TEST:", test_index) - x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T - y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T - for i in range(1,alpha_num+1): - alpha = alpha_set[i-1] - w_i = find_best_w(x_train,y_train,alpha,eta) - acc_i= Predict(w_i,x_test, y_test)[1] - acc_statistic[i-1]= acc_statistic[i-1]+acc_i - alpha_position = np.argmax(acc_statistic) - alpha_best = alpha_set[alpha_position] - softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) - else: - alpha_best = alpha_set[0] - #由于w_s2当中使用mysoftmax函数时候,传入的是浮点数alpha列表化之后的形式,所以这里要取一下浮点数 - softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) - return softmaxModel,alpha_best - -#%% -# 函数2、对比方法文件夹中的Left1函数 -def Left1(W1,train_data,train_label,alpha,beta,eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier. -##%% Our proposed method - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) - # 计算 W2_X_21 和 col_max - W21_X_21= np.dot(W21, X_21) - col_max = np.max(W21_X_21, axis=0) - M = W21_X_21 - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - - W21=np.vstack(((W21_tilde),W21_hat)) - - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - - softmaxModel = W21 - return softmaxModel - -#%% -# 函数3、对比方法文件夹中的Left2函数 -def Left2(W1, train_data, train_label, alpha,beta, eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier . -##%% Our proposed method -#%set old cost and new cost value - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - epsilon = 0.00001 - # % epsilon = 1; - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) - # 计算 W2_X_21 和 col_max - W21_X_21= np.dot(W21, X_21) - col_max = np.max(W21_X_21, axis=0) - M = W21_X_21 - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - reg = (W21_tilde@X_21-W1@X_21).T - norm_column = [] - for i in range(nSam2): - norm_column.append(np.linalg.norm(reg[i,:])) - norm_column=np.mat(norm_column) - MM = 1.0/(norm_column+epsilon) - #D = np.diag(MM.tolist()[0])#% D = eye(nSam2); Memory Error - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(norm_column[0:]) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - - W21=np.vstack(((W21_tilde),W21_hat)) - - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - - softmaxModel = W21 - return softmaxModel diff --git a/StreamLearn/tests/tests_SAFC/predict.py b/StreamLearn/tests/tests_SAFC/predict.py deleted file mode 100644 index 9b75edfdaca514f31a992128e6a05fb13f7bc54b..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/predict.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math -from sklearn.metrics import * - -#%% -# 函数、Predict函数 -def Predict(softmaxModel, test_data,test_label_ori): -##%% Input: -#% softmaxModel: the learned multi-class classifier. -#% test_data: the d*n input data matrix, where each column test_data(:, i) corresponds to -#% a single test set -#% test_label: the c*n input label matrix -#% our code should produce the prediction matrix -##%% Output: -#% pred: a n-dimension vector, where pred(i) is the prediction for the i-th test_data -#% acc: the testing accuracy. - pred = np.zeros((1, np.shape(test_data)[1])) -##%% ---------- SoftmaxPredict -------------------------------------- -#% Compute pred assuming that the labels start from 1. - test_data = test_data[0:np.shape(softmaxModel)[1],:] - #M = bsxfun(@minus,,max(softmaxModel*test_data,[],1)); - softmaxModel_test_data = softmaxModel@test_data - col_max = np.max(softmaxModel_test_data, axis=0) - M = softmaxModel_test_data - col_max - M = np.exp(M) - #predall = bsxfun(@rdivide, M, sum(M)) - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - predall= np.divide(M, divisor) - pred = np.argmax(predall,axis=0)+1 - probability = np.max(predall,axis=0) - test_label = np.argmax(test_label_ori, axis=0) + 1 - acc = np.mean(test_label.flatten()== pred.flatten()) - AUC=roc_auc_score(test_label_ori.tolist(),predall.tolist()) - F1_weight=f1_score(test_label.tolist(),pred.tolist()[0], average='weighted') - F1_macro=f1_score(test_label.tolist(),pred.tolist()[0], average='macro') - F1_micro=f1_score(test_label.tolist(),pred.tolist()[0], average='micro') - #AUC=1 - #F1_weight=1 - #F1_macro=1 - #F1_micro=1 - return pred,acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/request_import.py b/StreamLearn/tests/tests_SAFC/request_import.py deleted file mode 100644 index 071fdbad0102cf41a4e0b0e38a72938ea8786474..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/request_import.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -#本实验需要用到的库 -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/validfunc_both.py b/StreamLearn/tests/tests_SAFC/validfunc_both.py deleted file mode 100644 index 1fe346ed4ae17652ca9c21ac218e81cf4c2a458a..0000000000000000000000000000000000000000 --- a/StreamLearn/tests/tests_SAFC/validfunc_both.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from SAFC import * -from sklearn import model_selection -from sklearn.metrics import * -from predict import * - -#%% -def validfunc(train_data,train_label,alpha_set,eta,method,W1): - n = np.shape(train_data)[1] - k_fold = 5 - kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) - both_statistic = np.zeros((len(alpha_set),len(alpha_set))) - for train_index, test_index in kf.split(train_data.T): - x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T - y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T - for i in range(1,len(alpha_set)+1): - for j in range(1,len(alpha_set)+1): - alpha = alpha_set[i-1] - beta = alpha_set[j-1] - if method=="SAFC_D": - w_ours1 = SAFC_D(W1,x_train,y_train,alpha,beta,eta) - both_i_j=Predict(w_ours1,x_test, y_test)[1]+Predict(w_ours1,x_test, y_test)[2] - both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j - elif method=="SAFC_ID": - w_ours2 = SAFC_ID(W1,x_train,y_train,alpha,beta,eta) - both_i_j=Predict(w_ours2,x_test, y_test)[1]+Predict(w_ours2,x_test, y_test)[2] - both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j - else: - print("Error!") - - alpha_position=int(np.argmax(both_statistic)/len(alpha_set)) - beta_position=np.argmax(both_statistic)%len(alpha_set) - alpha_best = alpha_set[alpha_position] - beta_best = alpha_set[beta_position] - return alpha_best,beta_best \ No newline at end of file