From 0cf652bfaff0928e7644a65d146b5b07f70df5a3 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:38:25 +0000 Subject: [PATCH 01/20] update README.md. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- README.md | 345 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 282 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 437fb7e..b82970c 100644 --- a/README.md +++ b/README.md @@ -339,7 +339,10 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH - StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py -首先,获取算法所需数据集: +首先,根据代码配置文件StreamLearn/Algorithm/Algorithm_SAFC/request_import.py安装库完成环境配置 + + +然后,获取算法所需数据集: 地址: 通过百度网盘分享的文件:SAFC_datasets_CIFAR10.zip @@ -347,83 +350,299 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH 提取码:nudt 说明: -本实验需要用到的数据集是CIFAR10,本例将data_batch_1.mat至data_batch_4.mat中的数据作为第一阶段数据;data_batch_2.mat中的数据作为第二阶段的数据;test_batch.mat中的数据是测试数据 - +本实验需要用到的数据集是CIFAR10,将部分batch中的数据作为第一阶段数据;部分数据作为第二阶段的数据;test_batch.mat中的数据是测试数据 -然后,根据代码配置文件StreamLearn/Algorithm/Algorithm_SAFC/request_import.py安装库完成环境配置 +流式数据读取,传入数据地址: +```python +def __init__(self,args_address): + # 第一阶段、第二阶段、测试main函数 + # 存储地址 + self.save_dir=args_address.save_dir + + # 读取地址 + self.PathSet=args_address.PathSet + self.new_path=args_address.new_path + self.test_path=args_address.test_path +``` 其次,调用SAFC算法进行训练: -SAFC训练基于第一阶段复用的SVM模型,因此先训练SVM,并保存训练好的SVM模型: -```python -print('####eval####') -print("begin svm1 training!") -print('####eval####') -svm1 = SVC(probability=True,kernel="linear",decision_function_shape='ovo') -svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) -dump(svm1, save_dir+'/svm1.model') -print('####eval####') -print("end svm1 training!") -print('####eval####') - -# 复用SVM,训练SAFC_D,并保存训练后的模型: -print('####eval####') -print("begin SAFC_D training!") -print('####eval####') -w_ours1 = SAFC_D(np.mat(svm1._get_coef()), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) -dump(w_ours1, save_dir+'/SAFC_D.model') - -# 复用SVM,训练SAFC_ID,并保存训练后的模型: -print('####eval####') -print("begin SAFC_ID training!") -print('####eval####') -w_ours2 = SAFC_ID(np.mat(svm1._get_coef()),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) -dump(w_ours2, save_dir+'/SAFC_ID.model') +```python +def stream_fit(self): + # 第一阶段数据读取与训练 + SAFC_Stage1(self.PathSet,self.save_dir) + # 第一阶段模型提取 + print('####eval####') + print("Read stage1 model!") + print('####eval####') + stage1model=load(self.save_dir+'/svm1.model') + print('####eval####') + print("Have read stage1 model!") + print('####eval####') + + # 第二阶段数据读取与训练 + SAFC_Stage2(self.new_path,stage1model,self.save_dir) + # 第二阶段模型提取 + # 变体一 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model1=load(self.save_dir+'/SAFC_D.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') + # 变体二 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model2=load(self.save_dir+'/SAFC_ID.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') +``` + +具体而言,SAFC训练基于第一阶段复用的SVM模型,因此先训练SVM,并保存训练好的SVM模型: +```python +def SAFC_Stage1(PathSet,save_dir): + # 第一阶段数据读取与模型训练 + print('####eval####') + print("开始读取第一阶段数据!") + print('####eval####') + + X_past,Y_past=readbatchtoPython(PathSet) + X_past,Y_past=datareconsrtuct(X_past,Y_past) + Y_past=Y_past+1 + + print('####eval####') + print("第一阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s1=X_past + label_s1_vec=Y_past + label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) + + # 归一化 + print('####eval####') + print("第一阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s1 = scaler.fit_transform(data_s1) + print('####eval####') + print("第一阶段数据归一化完成!") + print('####eval####') + + # 第一阶段数据特征输出 + print('####eval####') + print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) + print('####eval####') + + # 第一阶段模型训练与存储,用SVM训练第一阶段模型 + print('####eval####') + print("begin svm1 training!") + print('####eval####') + svm1 = LinearSVC() + svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) + dump(svm1, save_dir+'/svm1.model') + print('####eval####') + print("end svm1 training!") + print('####eval####') + + return +``` + +复用SVM,在第二阶段训练SAFC_D和SAFC_ID两个变体,并保存训练后的模型: +```python +def SAFC_Stage2(new_path,svm1,save_dir): + # Paras + eta = 0.1 + alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] + alpha = 0.001 + beta = 0.01 + + # 第二阶段数据读取与模型训练 + print('####eval####') + print("开始读取第二阶段数据!") + print('####eval####') + + X_new,Y_new=readtopython(new_path) + X_new,Y_new=datareconsrtuct(X_new,Y_new) + Y_new=Y_new+1 + + print('####eval####') + print("第二阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s2=X_new + label_s2_vec=Y_new + label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) + + # 归一化 + print('####eval####') + print("第二阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s2 = scaler.fit_transform(data_s2) + print('####eval####') + print("第二阶段数据归一化完成!") + print('####eval####') + + # 第二阶段数据特征输出 + print('####eval####') + print("第二阶段涉及特征新增与类别新增!") + print('####eval####') + print('####eval####') + print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) + print('####eval####') + + # 第二阶段模型训练与存储 + # 变体一 + alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) + startours1 = time.time() + print('####eval####') + print("begin SAFC_D training!") + print('####eval####') + w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) + dump(w_ours1, save_dir+'/SAFC_D.model') + timeours1 = time.time() - startours1 + print('####eval####') + print(timeours1) + print('####eval####') + print('####eval####') + print("end SAFC_D training!") + print('####eval####') + + # 变体二 + alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) + startours2=time.time() + print('####eval####') + print("begin SAFC_ID training!") + print('####eval####') + w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) + dump(w_ours2, save_dir+'/SAFC_ID.model') + timeours2=time.time()-startours2 + print('####eval####') + print(timeours2) + print('####eval####') + print('####eval####') + print("end SAFC_ID training!") + print('####eval####') + + return # 其中,alpha_best1, beta_best1, alpha_best2, beta_best2, eta是超参数 ``` -最后,分别对两个模型进行性能测试: +最后,读取测试数据并完成测试与性能评估: ```python -# SAFC_D: -print('####eval####') -print("begin SAFC_D testing!") -print('####eval####') -pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) -print('####eval####') -print("begin SAFC_D evaluation!") -print('####eval####') -Acc_ours1.append(acc_ours1) -AUC_ours1.append(auc_ours1) -F1_weight_ours1.append(f1wei_ours1) -F1_macro_ours1.append(f1macro_ours1) -F1_micro_ours1.append(f1micro_ours1) - -# SAFC_ID: -print('####eval####') -print("begin SAFC_ID testing!") -print('####eval####') -pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) -print('####eval####') -print("begin SAFC_ID evaluation!") -print('####eval####') -Acc_ours2.append(acc_ours2) -AUC_ours2.append(auc_ours2) -F1_weight_ours2.append(f1wei_ours2) -F1_macro_ours2.append(f1macro_ours2) -F1_micro_ours2.append(f1micro_ours2) +def stream_evaluate(self): + # 测试数据读取与评估 + SAFC_test(self.test_path,stage2model1,stage2model2) ``` - -在test文件中,按照不同的数据流场景,提供两个main文件 -- StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py -对应一次性读取两阶段数据并完成测试 -- StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py -对应分批次读取两阶段数据,存储模型,读取测试数据,调用模型并完成测试 +具体而言, +```python +def SAFC_test(test_path,w_ours1,w_ours2): + # Metrics + Acc_ours1, Acc_ours2= [], [] + AUC_ours1, AUC_ours2 = [], [] + F1_weight_ours1, F1_weight_ours2 = [], [] + F1_macro_ours1, F1_macro_ours2 = [], [] + F1_micro_ours1, F1_micro_ours2 = [], [] + + # 测试数据读取与评估 + print('####eval####') + print("开始读取测试数据!") + print('####eval####') + + X_test,Y_test=readtopython(test_path) + X_test,Y_test=datareconsrtuct(X_test,Y_test) + Y_test=Y_test+1 + + print('####eval####') + print("测试数据读取完成!") + print('####eval####') + + # Given and transform + test_data=X_test + test_label_vec=Y_test + test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) + + # 归一化 + print('####eval####') + print("测试数据归一化!") + print('####eval####') + scaler = StandardScaler() + test_data = scaler.fit_transform(test_data) + print('####eval####') + print("测试数据归一化完成!") + print('####eval####') + + # 测试数据特征输出 + print('####eval####') + print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) + print('####eval####') + + # Evaluation + # 变体一 + print('####eval####') + print("begin SAFC_D testing!") + print('####eval####') + pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_D evaluation!") + print('####eval####') + Acc_ours1.append(acc_ours1) + AUC_ours1.append(auc_ours1) + F1_weight_ours1.append(f1wei_ours1) + F1_macro_ours1.append(f1macro_ours1) + F1_micro_ours1.append(f1micro_ours1) + + # 变体二 + print('####eval####') + print("begin SAFC_ID testing!") + print('####eval####') + pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_ID evaluation!") + print('####eval####') + Acc_ours2.append(acc_ours2) + AUC_ours2.append(auc_ours2) + F1_weight_ours2.append(f1wei_ours2) + F1_macro_ours2.append(f1macro_ours2) + F1_micro_ours2.append(f1micro_ours2) + + # 结果输出 + print('####eval####') + print("预测与评估完成,输出评估结果!") + print('####eval####') + + meanAcc_ours1 = np.mean(Acc_ours1) + meanAcc_ours2 = np.mean(Acc_ours2) + meanAuc_ours1 = np.mean(AUC_ours1) + meanAuc_ours2 = np.mean(AUC_ours2) + meanF1_macro_ours1 = np.mean(F1_macro_ours1) + meanF1_macro_ours2 = np.mean(F1_macro_ours2) + meanF1_weight_ours1 = np.mean(F1_weight_ours1) + meanF1_weight_ours2 = np.mean(F1_weight_ours2) + meanF1_micro_ours1 = np.mean(F1_micro_ours1) + meanF1_micro_ours2 = np.mean(F1_micro_ours2) + + print('####eval####') + print( + 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' + .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) + print('####eval####') + print('Finished!') + + return +``` ModelsIN用于存放训练获得的分类器 +测试主文件为test_SAFC.py ## 课题四 -- Gitee From 13bf002cf01e8104724aecd0a5fe1f4667a3f7ad Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:41:45 +0000 Subject: [PATCH 02/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/MetricsPred.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Algorithm/Algorithm_SAFC/MetricsPred.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py b/StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py deleted file mode 100644 index 84c6359..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py +++ /dev/null @@ -1,19 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from sklearn.metrics import * - -# %% -def MetricsPred(test_label,test_label_mat,pred,prob): - - acc = np.mean(test_label.flatten()== pred.flatten()) - AUC=roc_auc_score(test_label_mat.tolist(),prob.tolist()) - F1_weight=f1_score(test_label.tolist(),pred.tolist(), average='weighted') - F1_macro=f1_score(test_label.tolist(),pred.tolist(), average='macro') - F1_micro=f1_score(test_label.tolist(),pred.tolist(), average='micro') - return acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file -- Gitee From c32aa1912ace848c1b107c64a9136f52303a5bda Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:41:52 +0000 Subject: [PATCH 03/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/Read=5Fto=5FPython.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Algorithm_SAFC/Read_to_Python.py | 41 ------------------- 1 file changed, 41 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py b/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py deleted file mode 100644 index c8486ba..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from scipy.io import loadmat - -# %% -# tips -# import os -# print(f"当前工作目录: {os.getcwd()}") - -# 数据接口 -def readtopython(path): - data_part=loadmat(path) - X_part=data_part["data"].astype(np.float64) - Y_part=data_part["labels"].astype(np.float64) - return X_part,Y_part - -# Batch数据接口 -def readbatchtoPython(Path): - for path_index in range(len(Path)): - path=Path[path_index] - X_part,Y_part=readtopython(path) - if path_index==0: - X_past_original=X_part - Y_past_original=Y_part - if path_index!=0: - X_past_original=np.vstack((X_past_original,X_part)) - Y_past_original=np.vstack((Y_past_original,Y_part)) - # 第一阶段的数据没有No.9,即第10类数据 - Position_past=np.where(Y_past_original!=9)[0] - Y_past=Y_past_original[Position_past] - # 特征不含B通道的1024维特征 - X_past=X_past_original[Position_past,0:2048] - return X_past,Y_past - - -- Gitee From a1a986568b2e09d6d9d934d8234548baa614ec1e Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:42:09 +0000 Subject: [PATCH 04/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/SAFC.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py | 165 ------------------- 1 file changed, 165 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py deleted file mode 100644 index dbf74f4..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py +++ /dev/null @@ -1,165 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math - -#%% slove Memory Error key -#redefine X_21@D: -def AxdiagB(A,B): - #A=d*N,B=1*N - BL=B.tolist()[0] - C=[] - for i in range(np.shape(A)[0]): - C_row=[] - for j in range(np.shape(A)[1]): - ele=BL[j]*A[i,j] - C_row.append(ele) - C.append(C_row) - C_mat=np.mat(C) - return C_mat -#%% -# 函数1、SAFC_D -def SAFC_D(W1,train_data,train_label,alpha,beta,eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier. -##%% Our proposed method - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) - # 计算 W2_train_data 和 col_max - W2_train_data = np.dot(W2, train_data) - col_max = np.max(W2_train_data, axis=0) - M = W2_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - W22 = W22 - eta*W22_grad - W21=np.vstack(((W21_tilde),W21_hat)) - W2=np.hstack(((W21),W22)) - if object_value==[]: - np.mat(object_value.append(cost)) - else: - object_value=np.vstack(((object_value),cost)) - softmaxModel = W2 - return softmaxModel - -#%% -# 函数2、SAFC_ID函数 -def SAFC_ID(W1, train_data, train_label, alpha,beta, eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier . -##%% Our proposed method -#%set old cost and new cost value - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - epsilon = 0.00001 - # % epsilon = 1; - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) - # 计算 W2_train_data 和 col_max - W2_train_data = np.dot(W2, train_data) - col_max = np.max(W2_train_data, axis=0) - M = W2_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - reg = (W21_tilde@X_21-W1@X_21).T - norm_column = [] - for i in range(nSam2): - norm_column.append(np.linalg.norm(reg[i,:])) - norm_column=np.mat(norm_column) - MM = 1.0/(norm_column+epsilon) - #D = np.diag(MM.tolist()[0])#% D = eye(nSam2)Memory Error - - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(norm_column[0:]) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T - - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - W22 = W22 - eta*W22_grad - W21=np.vstack(((W21_tilde),W21_hat)) - W2=np.hstack(((W21),W22)) - if object_value==[]: - np.mat(object_value.append(cost)) - else: - object_value=np.vstack(((object_value),cost)) - softmaxModel = W2 - - return softmaxModel \ No newline at end of file -- Gitee From d34f677b721ce43b3f1ad505134c590e534042a2 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:42:17 +0000 Subject: [PATCH 05/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/SAFC=5FStream=5FFuncs.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Algorithm_SAFC/SAFC_Stream_Funcs.py | 273 ------------------ 1 file changed, 273 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py deleted file mode 100644 index 343167d..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py +++ /dev/null @@ -1,273 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load - -# %% Adress and Pre-define -# 定义一个稀疏矩阵的方法 -def ind2vec(indices, num_classes): - indices = np.asarray(indices) - out = np.zeros((num_classes, np.shape(indices)[1])) - for i in range(num_classes): - index3 = np.where(i + 1 == indices)[1] - out[i, index3] = 1 - return out - -def datareconsrtuct(X,Y): - #从大到小排列Y - sorted_index=np.argsort(Y, axis=None) - newY=Y[sorted_index] - #相应替换X的位置 - newX=X[sorted_index] - return newX,newY - -# %% 定义第一阶段的读取训练与存储 -def SAFC_Stage1(PathSet,save_dir): - # 第一阶段数据读取与模型训练 - print('####eval####') - print("开始读取第一阶段数据!") - print('####eval####') - - X_past,Y_past=readbatchtoPython(PathSet) - X_past,Y_past=datareconsrtuct(X_past,Y_past) - Y_past=Y_past+1 - - print('####eval####') - print("第一阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s1=X_past - label_s1_vec=Y_past - label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) - - # 归一化 - print('####eval####') - print("第一阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s1 = scaler.fit_transform(data_s1) - print('####eval####') - print("第一阶段数据归一化完成!") - print('####eval####') - - # 第一阶段数据特征输出 - print('####eval####') - print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) - print('####eval####') - - # 第一阶段模型训练与存储,用SVM训练第一阶段模型 - print('####eval####') - print("begin svm1 training!") - print('####eval####') - svm1 = LinearSVC() - svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) - dump(svm1, save_dir+'/svm1.model') - print('####eval####') - print("end svm1 training!") - print('####eval####') - - return - -# %% 定义第二阶段的读取训练与存储 -def SAFC_Stage2(new_path,svm1,save_dir): - # Paras - eta = 0.1 - alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] - alpha = 0.001 - beta = 0.01 - - # 第二阶段数据读取与模型训练 - print('####eval####') - print("开始读取第二阶段数据!") - print('####eval####') - - X_new,Y_new=readtopython(new_path) - X_new,Y_new=datareconsrtuct(X_new,Y_new) - Y_new=Y_new+1 - - print('####eval####') - print("第二阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s2=X_new - label_s2_vec=Y_new - label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) - - # 归一化 - print('####eval####') - print("第二阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s2 = scaler.fit_transform(data_s2) - print('####eval####') - print("第二阶段数据归一化完成!") - print('####eval####') - - # 第二阶段数据特征输出 - print('####eval####') - print("第二阶段涉及特征新增与类别新增!") - print('####eval####') - print('####eval####') - print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) - print('####eval####') - - # 第二阶段模型训练与存储 - # 变体一 - alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) - startours1 = time.time() - print('####eval####') - print("begin SAFC_D training!") - print('####eval####') - w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) - dump(w_ours1, save_dir+'/SAFC_D.model') - timeours1 = time.time() - startours1 - print('####eval####') - print(timeours1) - print('####eval####') - print('####eval####') - print("end SAFC_D training!") - print('####eval####') - - # 变体二 - alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) - startours2=time.time() - print('####eval####') - print("begin SAFC_ID training!") - print('####eval####') - w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) - dump(w_ours2, save_dir+'/SAFC_ID.model') - timeours2=time.time()-startours2 - print('####eval####') - print(timeours2) - print('####eval####') - print('####eval####') - print("end SAFC_ID training!") - print('####eval####') - - return - -# %% 定义测试数据读取与评估 -def SAFC_test(test_path,w_ours1,w_ours2): - # Metrics - Acc_ours1, Acc_ours2= [], [] - AUC_ours1, AUC_ours2 = [], [] - F1_weight_ours1, F1_weight_ours2 = [], [] - F1_macro_ours1, F1_macro_ours2 = [], [] - F1_micro_ours1, F1_micro_ours2 = [], [] - - # 测试数据读取与评估 - print('####eval####') - print("开始读取测试数据!") - print('####eval####') - - X_test,Y_test=readtopython(test_path) - X_test,Y_test=datareconsrtuct(X_test,Y_test) - Y_test=Y_test+1 - - print('####eval####') - print("测试数据读取完成!") - print('####eval####') - - # Given and transform - test_data=X_test - test_label_vec=Y_test - test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) - - # 归一化 - print('####eval####') - print("测试数据归一化!") - print('####eval####') - scaler = StandardScaler() - test_data = scaler.fit_transform(test_data) - print('####eval####') - print("测试数据归一化完成!") - print('####eval####') - - # 测试数据特征输出 - print('####eval####') - print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) - print('####eval####') - - # Evaluation - # 变体一 - print('####eval####') - print("begin SAFC_D testing!") - print('####eval####') - pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_D evaluation!") - print('####eval####') - Acc_ours1.append(acc_ours1) - AUC_ours1.append(auc_ours1) - F1_weight_ours1.append(f1wei_ours1) - F1_macro_ours1.append(f1macro_ours1) - F1_micro_ours1.append(f1micro_ours1) - - # 变体二 - print('####eval####') - print("begin SAFC_ID testing!") - print('####eval####') - pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_ID evaluation!") - print('####eval####') - Acc_ours2.append(acc_ours2) - AUC_ours2.append(auc_ours2) - F1_weight_ours2.append(f1wei_ours2) - F1_macro_ours2.append(f1macro_ours2) - F1_micro_ours2.append(f1micro_ours2) - - # 结果输出 - print('####eval####') - print("预测与评估完成,输出评估结果!") - print('####eval####') - - meanAcc_ours1 = np.mean(Acc_ours1) - meanAcc_ours2 = np.mean(Acc_ours2) - meanAuc_ours1 = np.mean(AUC_ours1) - meanAuc_ours2 = np.mean(AUC_ours2) - meanF1_macro_ours1 = np.mean(F1_macro_ours1) - meanF1_macro_ours2 = np.mean(F1_macro_ours2) - meanF1_weight_ours1 = np.mean(F1_weight_ours1) - meanF1_weight_ours2 = np.mean(F1_weight_ours2) - meanF1_micro_ours1 = np.mean(F1_micro_ours1) - meanF1_micro_ours2 = np.mean(F1_micro_ours2) - - print('####eval####') - print( - 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' - .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) - print('####eval####') - print('Finished!') - - return \ No newline at end of file -- Gitee From ccc70c0b9c237ebcecf62b90e5bc08c99bf8df9a Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:42:23 +0000 Subject: [PATCH 06/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/predict.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Algorithm/Algorithm_SAFC/predict.py | 49 ------------------- 1 file changed, 49 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/predict.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/predict.py b/StreamLearn/Algorithm/Algorithm_SAFC/predict.py deleted file mode 100644 index 9b75edf..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/predict.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math -from sklearn.metrics import * - -#%% -# 函数、Predict函数 -def Predict(softmaxModel, test_data,test_label_ori): -##%% Input: -#% softmaxModel: the learned multi-class classifier. -#% test_data: the d*n input data matrix, where each column test_data(:, i) corresponds to -#% a single test set -#% test_label: the c*n input label matrix -#% our code should produce the prediction matrix -##%% Output: -#% pred: a n-dimension vector, where pred(i) is the prediction for the i-th test_data -#% acc: the testing accuracy. - pred = np.zeros((1, np.shape(test_data)[1])) -##%% ---------- SoftmaxPredict -------------------------------------- -#% Compute pred assuming that the labels start from 1. - test_data = test_data[0:np.shape(softmaxModel)[1],:] - #M = bsxfun(@minus,,max(softmaxModel*test_data,[],1)); - softmaxModel_test_data = softmaxModel@test_data - col_max = np.max(softmaxModel_test_data, axis=0) - M = softmaxModel_test_data - col_max - M = np.exp(M) - #predall = bsxfun(@rdivide, M, sum(M)) - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - predall= np.divide(M, divisor) - pred = np.argmax(predall,axis=0)+1 - probability = np.max(predall,axis=0) - test_label = np.argmax(test_label_ori, axis=0) + 1 - acc = np.mean(test_label.flatten()== pred.flatten()) - AUC=roc_auc_score(test_label_ori.tolist(),predall.tolist()) - F1_weight=f1_score(test_label.tolist(),pred.tolist()[0], average='weighted') - F1_macro=f1_score(test_label.tolist(),pred.tolist()[0], average='macro') - F1_micro=f1_score(test_label.tolist(),pred.tolist()[0], average='micro') - #AUC=1 - #F1_weight=1 - #F1_macro=1 - #F1_micro=1 - return pred,acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file -- Gitee From 639ec1254634132d3a995596df07603217617ad0 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:45:10 +0000 Subject: [PATCH 07/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/compared=5Fmethod.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Algorithm_SAFC/compared_method.py | 242 ------------------ 1 file changed, 242 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py b/StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py deleted file mode 100644 index 613c1c2..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py +++ /dev/null @@ -1,242 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math -from sklearn import model_selection - -from predict import * - -#%% slove Memory Error key -#redefine X_21@D: -def AxdiagB(A,B): - #A=d*N,B=1*N - BL=B.tolist()[0] - C=[] - for i in range(np.shape(A)[0]): - C_row=[] - for j in range(np.shape(A)[1]): - ele=BL[j]*A[i,j] - C_row.append(ele) - C.append(C_row) - C_mat=np.mat(C) - return C_mat - -# %% -# 函数1-1、mysoftmax函数中涉及到的find_best_w函数 -def find_best_w(train_data,train_label, alpha, eta): - [n_Fea,n_Sam] = np.shape(train_data) - nCla = np.shape(train_label)[0] - W = 0.005*np.ones((nCla,n_Fea)) - count=0 #% count the running number - #%set old cost and new cost value - cost_old=0 - cost=1 - object_value=[] #% record the cost at each update iteration - loop_max=3000 - while (abs(cost_old - cost) > 10^-6)and (count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W*train_data,max(W*train_data, [], 1)); - # 计算 W_train_data 和 col_max - W_train_data = np.dot(W, train_data) - col_max = np.max(W_train_data, axis=0) - M = W_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - #.flatten()张成向量 - cost = -1/n_Sam * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W)) - W_grad = -1/n_Sam * (train_label - p) @ train_data.T + 2*alpha * W - W = W - eta*W_grad - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - W1=W - return W1 - -#%% -# 函数1、mysoftmax函数 -def mysoftmax( train_data,train_label,alpha_set,eta): -##%% Input: -#% train_data: d*n, where d is the dimension of data. -#% n is the number of data. -#% train_label: c*n, each column is a one-hot vector. -#% alpha_set: the hyperparameters set, which is used for cross-validation. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: c*d, the learned classifier. -#% alpha_best: the best hyperparameter determined by cross-validation. -##%% cross validation - if len(alpha_set)>1: - alpha_num = len(alpha_set) - n = np.shape(train_data)[1] - k_fold = 5 - #Indices = crossvalind('Kfold', n, k_fold) - kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) - acc_statistic = np.zeros((alpha_num,1)) - for train_index, test_index in kf.split(train_data.T): - #print("TRAIN:", train_index, "TEST:", test_index) - x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T - y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T - for i in range(1,alpha_num+1): - alpha = alpha_set[i-1] - w_i = find_best_w(x_train,y_train,alpha,eta) - acc_i= Predict(w_i,x_test, y_test)[1] - acc_statistic[i-1]= acc_statistic[i-1]+acc_i - alpha_position = np.argmax(acc_statistic) - alpha_best = alpha_set[alpha_position] - softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) - else: - alpha_best = alpha_set[0] - #由于w_s2当中使用mysoftmax函数时候,传入的是浮点数alpha列表化之后的形式,所以这里要取一下浮点数 - softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) - return softmaxModel,alpha_best - -#%% -# 函数2、对比方法文件夹中的Left1函数 -def Left1(W1,train_data,train_label,alpha,beta,eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier. -##%% Our proposed method - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) - # 计算 W2_X_21 和 col_max - W21_X_21= np.dot(W21, X_21) - col_max = np.max(W21_X_21, axis=0) - M = W21_X_21 - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - - W21=np.vstack(((W21_tilde),W21_hat)) - - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - - softmaxModel = W21 - return softmaxModel - -#%% -# 函数3、对比方法文件夹中的Left2函数 -def Left2(W1, train_data, train_label, alpha,beta, eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier . -##%% Our proposed method -#%set old cost and new cost value - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - epsilon = 0.00001 - # % epsilon = 1; - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) - # 计算 W2_X_21 和 col_max - W21_X_21= np.dot(W21, X_21) - col_max = np.max(W21_X_21, axis=0) - M = W21_X_21 - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - reg = (W21_tilde@X_21-W1@X_21).T - norm_column = [] - for i in range(nSam2): - norm_column.append(np.linalg.norm(reg[i,:])) - norm_column=np.mat(norm_column) - MM = 1.0/(norm_column+epsilon) - #D = np.diag(MM.tolist()[0])#% D = eye(nSam2); Memory Error - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(norm_column[0:]) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - - W21=np.vstack(((W21_tilde),W21_hat)) - - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - - softmaxModel = W21 - return softmaxModel -- Gitee From dfa0b08db78aca3d4094dae3db9b9452941416aa Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:45:15 +0000 Subject: [PATCH 08/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/request=5Fimport.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Algorithm_SAFC/request_import.py | 33 ------------------- 1 file changed, 33 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/request_import.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py b/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py deleted file mode 100644 index 071fdba..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -#本实验需要用到的库 -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load \ No newline at end of file -- Gitee From 43b80339a0818c1a861b634a5e85086e1104b736 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:45:27 +0000 Subject: [PATCH 09/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/validfunc=5Fboth.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Algorithm_SAFC/validfunc_both.py | 42 ------------------- 1 file changed, 42 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py b/StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py deleted file mode 100644 index 1fe346e..0000000 --- a/StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from SAFC import * -from sklearn import model_selection -from sklearn.metrics import * -from predict import * - -#%% -def validfunc(train_data,train_label,alpha_set,eta,method,W1): - n = np.shape(train_data)[1] - k_fold = 5 - kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) - both_statistic = np.zeros((len(alpha_set),len(alpha_set))) - for train_index, test_index in kf.split(train_data.T): - x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T - y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T - for i in range(1,len(alpha_set)+1): - for j in range(1,len(alpha_set)+1): - alpha = alpha_set[i-1] - beta = alpha_set[j-1] - if method=="SAFC_D": - w_ours1 = SAFC_D(W1,x_train,y_train,alpha,beta,eta) - both_i_j=Predict(w_ours1,x_test, y_test)[1]+Predict(w_ours1,x_test, y_test)[2] - both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j - elif method=="SAFC_ID": - w_ours2 = SAFC_ID(W1,x_train,y_train,alpha,beta,eta) - both_i_j=Predict(w_ours2,x_test, y_test)[1]+Predict(w_ours2,x_test, y_test)[2] - both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j - else: - print("Error!") - - alpha_position=int(np.argmax(both_statistic)/len(alpha_set)) - beta_position=np.argmax(both_statistic)%len(alpha_set) - alpha_best = alpha_set[alpha_position] - beta_best = alpha_set[beta_position] - return alpha_best,beta_best \ No newline at end of file -- Gitee From 50cf0bbc63c26b567dea0c3ee9fea98ff080e385 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:47:01 +0000 Subject: [PATCH 10/20] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20Algorithm=5FSAFC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/Algorithm_SAFC/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/.keep diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/.keep b/StreamLearn/Algorithm/Algorithm_SAFC/.keep new file mode 100644 index 0000000..e69de29 -- Gitee From 5f197ea7771bb094c654bfdcedeb4cc4b22ddc23 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:48:18 +0000 Subject: [PATCH 11/20] update Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- .../Class_CIFAR10_SAFC_DandSAFC_ID.py | 83 ++++++ .../Algorithm/Algorithm_SAFC/MetricsPred.py | 19 ++ .../Algorithm_SAFC/Read_to_Python.py | 57 ++++ StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py | 165 +++++++++++ .../Algorithm_SAFC/SAFC_Stream_Funcs.py | 275 ++++++++++++++++++ .../Algorithm_SAFC/compared_method.py | 242 +++++++++++++++ .../Algorithm/Algorithm_SAFC/predict.py | 49 ++++ .../Algorithm_SAFC/request_import.py | 37 +++ .../Algorithm_SAFC/validfunc_both.py | 42 +++ 9 files changed, 969 insertions(+) create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/Class_CIFAR10_SAFC_DandSAFC_ID.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/predict.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/request_import.py create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Class_CIFAR10_SAFC_DandSAFC_ID.py b/StreamLearn/Algorithm/Algorithm_SAFC/Class_CIFAR10_SAFC_DandSAFC_ID.py new file mode 100644 index 0000000..0bf55c0 --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/Class_CIFAR10_SAFC_DandSAFC_ID.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import time + +import numpy as np +import math + +from sklearn.preprocessing import StandardScaler +from sklearn import model_selection +from sklearn.metrics import * + +from sklearn import svm +from sklearn.svm import SVC +from sklearn.svm import LinearSVC + +import scipy.io as io +from scipy.io import loadmat +import os +from joblib import dump,load + +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC_Stream_Funcs import * + +class SAFC_achieve(): + def __init__(self,args_address): + # %% 第一阶段、第二阶段、测试main函数 + # 存储地址 + self.save_dir=args_address.save_dir + + # 读取地址 + self.PathSet=args_address.PathSet + self.new_path=args_address.new_path + self.test_path=args_address.test_path + + def stream_fit(self): + # 第一阶段数据读取与训练 + SAFC_Stage1(self.PathSet,self.save_dir) + # 第一阶段模型提取 + print('####eval####') + print("Read stage1 model!") + print('####eval####') + stage1model=load(self.save_dir+'/svm1.model') + print('####eval####') + print("Have read stage1 model!") + print('####eval####') + + # 第二阶段数据读取与训练 + SAFC_Stage2(self.new_path,stage1model,self.save_dir) + # 第二阶段模型提取 + # 变体一 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model1=load(self.save_dir+'/SAFC_D.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') + # 变体二 + print('####eval####') + print("Read stage2-SAFC_D model!") + print('####eval####') + stage2model2=load(self.save_dir+'/SAFC_ID.model') + print('####eval####') + print("Have read stage2-SAFC_D model!") + print('####eval####') + + def stream_evaluate(self): + # 测试数据读取与评估 + SAFC_test(self.test_path,stage2model1,stage2model2) + diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py b/StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py new file mode 100644 index 0000000..84c6359 --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/MetricsPred.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import numpy as np +from sklearn.metrics import * + +# %% +def MetricsPred(test_label,test_label_mat,pred,prob): + + acc = np.mean(test_label.flatten()== pred.flatten()) + AUC=roc_auc_score(test_label_mat.tolist(),prob.tolist()) + F1_weight=f1_score(test_label.tolist(),pred.tolist(), average='weighted') + F1_macro=f1_score(test_label.tolist(),pred.tolist(), average='macro') + F1_micro=f1_score(test_label.tolist(),pred.tolist(), average='micro') + return acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py b/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py new file mode 100644 index 0000000..49ba73d --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import numpy as np +from scipy.io import loadmat + +# %% +# tips +# import os +# print(f"当前工作目录: {os.getcwd()}") + +# 数据接口 +def readtopython(path): + data_part=loadmat(path) + X_part=data_part["data"].astype(np.float64) + Y_part=data_part["labels"].astype(np.float64) + return X_part,Y_part + +# Batch数据接口 +# Stage 1 +def readbatchtoPython_Past(Path): + for path_index in range(len(Path)): + path=Path[path_index] + X_part,Y_part=readtopython(path) + if path_index==0: + X_past_original=X_part + Y_past_original=Y_part + if path_index!=0: + X_past_original=np.vstack((X_past_original,X_part)) + Y_past_original=np.vstack((Y_past_original,Y_part)) + # 第一阶段的数据没有No.9,即第10类数据 + Position_past=np.where(Y_past_original!=9)[0] + Y_past=Y_past_original[Position_past] + # 特征不含B通道的1024维特征 + X_past=X_past_original[Position_past,0:2048] + return X_past,Y_past + +# Stage 2 +def readbatchtoPython_New(Path): + for path_index in range(len(Path)): + path=Path[path_index] + X_part,Y_part=readtopython(path) + if path_index==0: + X_new_original=X_part + Y_new_original=Y_part + if path_index!=0: + X_new_original=np.vstack((X_new_original,X_part)) + Y_new_original=np.vstack((Y_new_original,Y_part)) + X_new=X_new_original + Y_new=Y_new_original + return X_new,Y_new + + diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py new file mode 100644 index 0000000..dbf74f4 --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import numpy as np +import math + +#%% slove Memory Error key +#redefine X_21@D: +def AxdiagB(A,B): + #A=d*N,B=1*N + BL=B.tolist()[0] + C=[] + for i in range(np.shape(A)[0]): + C_row=[] + for j in range(np.shape(A)[1]): + ele=BL[j]*A[i,j] + C_row.append(ele) + C.append(C_row) + C_mat=np.mat(C) + return C_mat +#%% +# 函数1、SAFC_D +def SAFC_D(W1,train_data,train_label,alpha,beta,eta): +##%% Input: +#% W1: c*d1, a model learned in previous stage, and it will be reused in +#% current stage. W1 can be learned by using 'mysoftmax' function. +#% c and d1 are the number of classes and the dimension of data in +#% previous stage, respectively. +#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. +#% n is the number of data in current stage. +#% train_label: (c+1)*n, each column is a one-hot vector. +#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. +#% eta: the step size in the gradient descent algorithm. +##%% Output: +#% softmaxModel: (c+1)*d, the learned classifier. +##%% Our proposed method + cost_old = 0 #%set old cost and new cost value + cost = 1 + object_value = [] #% record the cost at each update iteration + count = 0 #% count the running number + nCla1,nFea1 = np.shape(W1) + nCla2,nSam2 = np.shape(train_label) + nFea2 = np.shape(train_data)[0] + X_21 = train_data[0:nFea1,:] + X_22 = train_data[nFea1:,:] + W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W + #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] + W21 = W2[:,0:nFea1] + W22 = W2[:,nFea1:] + W21_tilde = W21[0:nCla1,:] + W21_hat = W21[nCla1,:] + W22_tilde = W22[0:nCla1,:] + W22_hat = W22[nCla1,:] + loop_max=3000 + while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): + #%( abs(cost_old - cost) > 0.0001*cost ) && + cost_old = cost + count=count+1 + #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) + # 计算 W2_train_data 和 col_max + W2_train_data = np.dot(W2, train_data) + col_max = np.max(W2_train_data, axis=0) + M = W2_train_data - col_max + M = np.exp(M) + #p = bsxfun(@rdivide, M, sum(M)); + col_sum = np.sum(M, axis=0) + divisor = np.tile(col_sum, (M.shape[0], 1)) + p= np.divide(M, divisor) + cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) + W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) + W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat + W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 + W21_tilde = W21_tilde - eta*W21_tilde_grad + W21_hat = W21_hat - eta*W21_hat_grad + W22 = W22 - eta*W22_grad + W21=np.vstack(((W21_tilde),W21_hat)) + W2=np.hstack(((W21),W22)) + if object_value==[]: + np.mat(object_value.append(cost)) + else: + object_value=np.vstack(((object_value),cost)) + softmaxModel = W2 + return softmaxModel + +#%% +# 函数2、SAFC_ID函数 +def SAFC_ID(W1, train_data, train_label, alpha,beta, eta): +##%% Input: +#% W1: c*d1, a model learned in previous stage, and it will be reused in +#% current stage. W1 can be learned by using 'mysoftmax' function. +#% c and d1 are the number of classes and the dimension of data in +#% previous stage, respectively. +#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. +#% n is the number of data in current stage. +#% train_label: (c+1)*n, each column is a one-hot vector. +#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. +#% eta: the step size in the gradient descent algorithm. +##%% Output: +#% softmaxModel: (c+1)*d, the learned classifier . +##%% Our proposed method +#%set old cost and new cost value + cost_old = 0 #%set old cost and new cost value + cost = 1 + object_value = [] #% record the cost at each update iteration + count = 0 #% count the running number + nCla1,nFea1 = np.shape(W1) + nCla2,nSam2 = np.shape(train_label) + nFea2 = np.shape(train_data)[0] + X_21 = train_data[0:nFea1,:] + X_22 = train_data[nFea1:,:] + W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W + #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] + W21 = W2[:,0:nFea1] + W22 = W2[:,nFea1:] + W21_tilde = W21[0:nCla1,:] + W21_hat = W21[nCla1,:] + W22_tilde = W22[0:nCla1,:] + W22_hat = W22[nCla1,:] + epsilon = 0.00001 + # % epsilon = 1; + loop_max=3000 + while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): + #%( abs(cost_old - cost) > 0.0001*cost ) && + cost_old = cost + count=count+1 + #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) + # 计算 W2_train_data 和 col_max + W2_train_data = np.dot(W2, train_data) + col_max = np.max(W2_train_data, axis=0) + M = W2_train_data - col_max + M = np.exp(M) + #p = bsxfun(@rdivide, M, sum(M)); + col_sum = np.sum(M, axis=0) + divisor = np.tile(col_sum, (M.shape[0], 1)) + p= np.divide(M, divisor) + reg = (W21_tilde@X_21-W1@X_21).T + norm_column = [] + for i in range(nSam2): + norm_column.append(np.linalg.norm(reg[i,:])) + norm_column=np.mat(norm_column) + MM = 1.0/(norm_column+epsilon) + #D = np.diag(MM.tolist()[0])#% D = eye(nSam2)Memory Error + + cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(norm_column[0:]) + W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T + + W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat + W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 + + W21_tilde = W21_tilde - eta*W21_tilde_grad + W21_hat = W21_hat - eta*W21_hat_grad + W22 = W22 - eta*W22_grad + W21=np.vstack(((W21_tilde),W21_hat)) + W2=np.hstack(((W21),W22)) + if object_value==[]: + np.mat(object_value.append(cost)) + else: + object_value=np.vstack(((object_value),cost)) + softmaxModel = W2 + + return softmaxModel \ No newline at end of file diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py new file mode 100644 index 0000000..4f7f4b4 --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import time + +import numpy as np +import math + +from sklearn.preprocessing import StandardScaler +from sklearn import model_selection +from sklearn.metrics import * + +from sklearn import svm +from sklearn.svm import SVC +from sklearn.svm import LinearSVC + +import scipy.io as io +from scipy.io import loadmat +import os +from joblib import dump,load + +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * + +# %% Adress and Pre-define +# 定义一个稀疏矩阵的方法 +def ind2vec(indices, num_classes): + indices = np.asarray(indices) + out = np.zeros((num_classes, np.shape(indices)[1])) + for i in range(num_classes): + index3 = np.where(i + 1 == indices)[1] + out[i, index3] = 1 + return out + +def datareconsrtuct(X,Y): + #从大到小排列Y + sorted_index=np.argsort(Y, axis=None) + newY=Y[sorted_index] + #相应替换X的位置 + newX=X[sorted_index] + return newX,newY + +# %% 定义第一阶段的读取训练与存储 +def SAFC_Stage1(PathSet,save_dir): + # 第一阶段数据读取与模型训练 + print('####eval####') + print("开始读取第一阶段数据!") + print('####eval####') + + X_past,Y_past=readbatchtoPython_Past(PathSet) + X_past,Y_past=datareconsrtuct(X_past,Y_past) + Y_past=Y_past+1 + + print('####eval####') + print("第一阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s1=X_past + label_s1_vec=Y_past + label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) + + # 归一化 + print('####eval####') + print("第一阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s1 = scaler.fit_transform(data_s1) + print('####eval####') + print("第一阶段数据归一化完成!") + print('####eval####') + + # 第一阶段数据特征输出 + print('####eval####') + print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) + print('####eval####') + + # 第一阶段模型训练与存储,用SVM训练第一阶段模型 + print('####eval####') + print("begin svm1 training!") + print('####eval####') + svm1 = LinearSVC() + svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) + dump(svm1, save_dir+'/svm1.model') + print('####eval####') + print("end svm1 training!") + print('####eval####') + + return + +# %% 定义第二阶段的读取训练与存储 +def SAFC_Stage2(new_path,svm1,save_dir): + # Paras + eta = 0.1 + alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] + alpha = 0.001 + beta = 0.01 + + # 第二阶段数据读取与模型训练 + print('####eval####') + print("开始读取第二阶段数据!") + print('####eval####') + + X_new,Y_new=readbatchtoPython_New(new_path) + X_new,Y_new=datareconsrtuct(X_new,Y_new) + Y_new=Y_new+1 + + print('####eval####') + print("第二阶段数据读取完成!") + print('####eval####') + + # Given and transform + data_s2=X_new + label_s2_vec=Y_new + label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) + + # 归一化 + print('####eval####') + print("第二阶段数据归一化!") + print('####eval####') + scaler = StandardScaler() + data_s2 = scaler.fit_transform(data_s2) + print('####eval####') + print("第二阶段数据归一化完成!") + print('####eval####') + + # 第二阶段数据特征输出 + print('####eval####') + print("第二阶段涉及特征新增与类别新增!") + print('####eval####') + print('####eval####') + print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) + print('####eval####') + + # 第二阶段模型训练与存储 + # 变体一 + alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) + startours1 = time.time() + print('####eval####') + print("begin SAFC_D training!") + print('####eval####') + w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) + dump(w_ours1, save_dir+'/SAFC_D.model') + timeours1 = time.time() - startours1 + print('####eval####') + print(timeours1) + print('####eval####') + print('####eval####') + print("end SAFC_D training!") + print('####eval####') + + # 变体二 + alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) + startours2=time.time() + print('####eval####') + print("begin SAFC_ID training!") + print('####eval####') + w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) + dump(w_ours2, save_dir+'/SAFC_ID.model') + timeours2=time.time()-startours2 + print('####eval####') + print(timeours2) + print('####eval####') + print('####eval####') + print("end SAFC_ID training!") + print('####eval####') + + return + +# %% 定义测试数据读取与评估 +def SAFC_test(test_path,w_ours1,w_ours2): + # Metrics + Acc_ours1, Acc_ours2= [], [] + AUC_ours1, AUC_ours2 = [], [] + F1_weight_ours1, F1_weight_ours2 = [], [] + F1_macro_ours1, F1_macro_ours2 = [], [] + F1_micro_ours1, F1_micro_ours2 = [], [] + + # 测试数据读取与评估 + print('####eval####') + print("开始读取测试数据!") + print('####eval####') + + X_test,Y_test=readtopython(test_path) + X_test,Y_test=datareconsrtuct(X_test,Y_test) + Y_test=Y_test+1 + + print('####eval####') + print("测试数据读取完成!") + print('####eval####') + + # Given and transform + test_data=X_test + test_label_vec=Y_test + test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) + + # 归一化 + print('####eval####') + print("测试数据归一化!") + print('####eval####') + scaler = StandardScaler() + test_data = scaler.fit_transform(test_data) + print('####eval####') + print("测试数据归一化完成!") + print('####eval####') + + # 测试数据特征输出 + print('####eval####') + print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) + print('####eval####') + + # Evaluation + # 变体一 + print('####eval####') + print("begin SAFC_D testing!") + print('####eval####') + pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_D evaluation!") + print('####eval####') + Acc_ours1.append(acc_ours1) + AUC_ours1.append(auc_ours1) + F1_weight_ours1.append(f1wei_ours1) + F1_macro_ours1.append(f1macro_ours1) + F1_micro_ours1.append(f1micro_ours1) + + # 变体二 + print('####eval####') + print("begin SAFC_ID testing!") + print('####eval####') + pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) + print('####eval####') + print("begin SAFC_ID evaluation!") + print('####eval####') + Acc_ours2.append(acc_ours2) + AUC_ours2.append(auc_ours2) + F1_weight_ours2.append(f1wei_ours2) + F1_macro_ours2.append(f1macro_ours2) + F1_micro_ours2.append(f1micro_ours2) + + # 结果输出 + print('####eval####') + print("预测与评估完成,输出评估结果!") + print('####eval####') + + meanAcc_ours1 = np.mean(Acc_ours1) + meanAcc_ours2 = np.mean(Acc_ours2) + meanAuc_ours1 = np.mean(AUC_ours1) + meanAuc_ours2 = np.mean(AUC_ours2) + meanF1_macro_ours1 = np.mean(F1_macro_ours1) + meanF1_macro_ours2 = np.mean(F1_macro_ours2) + meanF1_weight_ours1 = np.mean(F1_weight_ours1) + meanF1_weight_ours2 = np.mean(F1_weight_ours2) + meanF1_micro_ours1 = np.mean(F1_micro_ours1) + meanF1_micro_ours2 = np.mean(F1_micro_ours2) + + print('####eval####') + print( + 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' + .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) + print('####eval####') + print('Finished!') + + return \ No newline at end of file diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py b/StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py new file mode 100644 index 0000000..613c1c2 --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/compared_method.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import numpy as np +import math +from sklearn import model_selection + +from predict import * + +#%% slove Memory Error key +#redefine X_21@D: +def AxdiagB(A,B): + #A=d*N,B=1*N + BL=B.tolist()[0] + C=[] + for i in range(np.shape(A)[0]): + C_row=[] + for j in range(np.shape(A)[1]): + ele=BL[j]*A[i,j] + C_row.append(ele) + C.append(C_row) + C_mat=np.mat(C) + return C_mat + +# %% +# 函数1-1、mysoftmax函数中涉及到的find_best_w函数 +def find_best_w(train_data,train_label, alpha, eta): + [n_Fea,n_Sam] = np.shape(train_data) + nCla = np.shape(train_label)[0] + W = 0.005*np.ones((nCla,n_Fea)) + count=0 #% count the running number + #%set old cost and new cost value + cost_old=0 + cost=1 + object_value=[] #% record the cost at each update iteration + loop_max=3000 + while (abs(cost_old - cost) > 10^-6)and (count < loop_max ): + #%( abs(cost_old - cost) > 0.0001*cost ) && + cost_old = cost + count=count+1 + #M = bsxfun(@minus,W*train_data,max(W*train_data, [], 1)); + # 计算 W_train_data 和 col_max + W_train_data = np.dot(W, train_data) + col_max = np.max(W_train_data, axis=0) + M = W_train_data - col_max + M = np.exp(M) + #p = bsxfun(@rdivide, M, sum(M)); + col_sum = np.sum(M, axis=0) + divisor = np.tile(col_sum, (M.shape[0], 1)) + p= np.divide(M, divisor) + #.flatten()张成向量 + cost = -1/n_Sam * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W)) + W_grad = -1/n_Sam * (train_label - p) @ train_data.T + 2*alpha * W + W = W - eta*W_grad + if object_value==[]: + object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) + else: + object_value=np.vstack(((object_value),cost)) + W1=W + return W1 + +#%% +# 函数1、mysoftmax函数 +def mysoftmax( train_data,train_label,alpha_set,eta): +##%% Input: +#% train_data: d*n, where d is the dimension of data. +#% n is the number of data. +#% train_label: c*n, each column is a one-hot vector. +#% alpha_set: the hyperparameters set, which is used for cross-validation. +#% eta: the step size in the gradient descent algorithm. +##%% Output: +#% softmaxModel: c*d, the learned classifier. +#% alpha_best: the best hyperparameter determined by cross-validation. +##%% cross validation + if len(alpha_set)>1: + alpha_num = len(alpha_set) + n = np.shape(train_data)[1] + k_fold = 5 + #Indices = crossvalind('Kfold', n, k_fold) + kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) + acc_statistic = np.zeros((alpha_num,1)) + for train_index, test_index in kf.split(train_data.T): + #print("TRAIN:", train_index, "TEST:", test_index) + x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T + y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T + for i in range(1,alpha_num+1): + alpha = alpha_set[i-1] + w_i = find_best_w(x_train,y_train,alpha,eta) + acc_i= Predict(w_i,x_test, y_test)[1] + acc_statistic[i-1]= acc_statistic[i-1]+acc_i + alpha_position = np.argmax(acc_statistic) + alpha_best = alpha_set[alpha_position] + softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) + else: + alpha_best = alpha_set[0] + #由于w_s2当中使用mysoftmax函数时候,传入的是浮点数alpha列表化之后的形式,所以这里要取一下浮点数 + softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) + return softmaxModel,alpha_best + +#%% +# 函数2、对比方法文件夹中的Left1函数 +def Left1(W1,train_data,train_label,alpha,beta,eta): +##%% Input: +#% W1: c*d1, a model learned in previous stage, and it will be reused in +#% current stage. W1 can be learned by using 'mysoftmax' function. +#% c and d1 are the number of classes and the dimension of data in +#% previous stage, respectively. +#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. +#% n is the number of data in current stage. +#% train_label: (c+1)*n, each column is a one-hot vector. +#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. +#% eta: the step size in the gradient descent algorithm. +##%% Output: +#% softmaxModel: (c+1)*d, the learned classifier. +##%% Our proposed method + cost_old = 0 #%set old cost and new cost value + cost = 1 + object_value = [] #% record the cost at each update iteration + count = 0 #% count the running number + nCla1,nFea1 = np.shape(W1) + nCla2,nSam2 = np.shape(train_label) + nFea2 = np.shape(train_data)[0] + X_21 = train_data[0:nFea1,:] + X_22 = train_data[nFea1:,:] + W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W + #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] + W21 = W2[:,0:nFea1] + W22 = W2[:,nFea1:] + W21_tilde = W21[0:nCla1,:] + W21_hat = W21[nCla1,:] + W22_tilde = W22[0:nCla1,:] + W22_hat = W22[nCla1,:] + loop_max=3000 + while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): + #%( abs(cost_old - cost) > 0.0001*cost ) && + cost_old = cost + count=count+1 + #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) + # 计算 W2_X_21 和 col_max + W21_X_21= np.dot(W21, X_21) + col_max = np.max(W21_X_21, axis=0) + M = W21_X_21 - col_max + M = np.exp(M) + #p = bsxfun(@rdivide, M, sum(M)); + col_sum = np.sum(M, axis=0) + divisor = np.tile(col_sum, (M.shape[0], 1)) + p= np.divide(M, divisor) + cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) + W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) + W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat + + W21_tilde = W21_tilde - eta*W21_tilde_grad + W21_hat = W21_hat - eta*W21_hat_grad + + W21=np.vstack(((W21_tilde),W21_hat)) + + if object_value==[]: + object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) + else: + object_value=np.vstack(((object_value),cost)) + + softmaxModel = W21 + return softmaxModel + +#%% +# 函数3、对比方法文件夹中的Left2函数 +def Left2(W1, train_data, train_label, alpha,beta, eta): +##%% Input: +#% W1: c*d1, a model learned in previous stage, and it will be reused in +#% current stage. W1 can be learned by using 'mysoftmax' function. +#% c and d1 are the number of classes and the dimension of data in +#% previous stage, respectively. +#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. +#% n is the number of data in current stage. +#% train_label: (c+1)*n, each column is a one-hot vector. +#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. +#% eta: the step size in the gradient descent algorithm. +##%% Output: +#% softmaxModel: (c+1)*d, the learned classifier . +##%% Our proposed method +#%set old cost and new cost value + cost_old = 0 #%set old cost and new cost value + cost = 1 + object_value = [] #% record the cost at each update iteration + count = 0 #% count the running number + nCla1,nFea1 = np.shape(W1) + nCla2,nSam2 = np.shape(train_label) + nFea2 = np.shape(train_data)[0] + X_21 = train_data[0:nFea1,:] + X_22 = train_data[nFea1:,:] + W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W + #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] + W21 = W2[:,0:nFea1] + W22 = W2[:,nFea1:] + W21_tilde = W21[0:nCla1,:] + W21_hat = W21[nCla1,:] + W22_tilde = W22[0:nCla1,:] + W22_hat = W22[nCla1,:] + epsilon = 0.00001 + # % epsilon = 1; + loop_max=3000 + while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): + #%( abs(cost_old - cost) > 0.0001*cost ) && + cost_old = cost + count=count+1 + #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) + # 计算 W2_X_21 和 col_max + W21_X_21= np.dot(W21, X_21) + col_max = np.max(W21_X_21, axis=0) + M = W21_X_21 - col_max + M = np.exp(M) + #p = bsxfun(@rdivide, M, sum(M)); + col_sum = np.sum(M, axis=0) + divisor = np.tile(col_sum, (M.shape[0], 1)) + p= np.divide(M, divisor) + reg = (W21_tilde@X_21-W1@X_21).T + norm_column = [] + for i in range(nSam2): + norm_column.append(np.linalg.norm(reg[i,:])) + norm_column=np.mat(norm_column) + MM = 1.0/(norm_column+epsilon) + #D = np.diag(MM.tolist()[0])#% D = eye(nSam2); Memory Error + cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(norm_column[0:]) + W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T + W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat + + W21_tilde = W21_tilde - eta*W21_tilde_grad + W21_hat = W21_hat - eta*W21_hat_grad + + W21=np.vstack(((W21_tilde),W21_hat)) + + if object_value==[]: + object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) + else: + object_value=np.vstack(((object_value),cost)) + + softmaxModel = W21 + return softmaxModel diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/predict.py b/StreamLearn/Algorithm/Algorithm_SAFC/predict.py new file mode 100644 index 0000000..9b75edf --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/predict.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import numpy as np +import math +from sklearn.metrics import * + +#%% +# 函数、Predict函数 +def Predict(softmaxModel, test_data,test_label_ori): +##%% Input: +#% softmaxModel: the learned multi-class classifier. +#% test_data: the d*n input data matrix, where each column test_data(:, i) corresponds to +#% a single test set +#% test_label: the c*n input label matrix +#% our code should produce the prediction matrix +##%% Output: +#% pred: a n-dimension vector, where pred(i) is the prediction for the i-th test_data +#% acc: the testing accuracy. + pred = np.zeros((1, np.shape(test_data)[1])) +##%% ---------- SoftmaxPredict -------------------------------------- +#% Compute pred assuming that the labels start from 1. + test_data = test_data[0:np.shape(softmaxModel)[1],:] + #M = bsxfun(@minus,,max(softmaxModel*test_data,[],1)); + softmaxModel_test_data = softmaxModel@test_data + col_max = np.max(softmaxModel_test_data, axis=0) + M = softmaxModel_test_data - col_max + M = np.exp(M) + #predall = bsxfun(@rdivide, M, sum(M)) + col_sum = np.sum(M, axis=0) + divisor = np.tile(col_sum, (M.shape[0], 1)) + predall= np.divide(M, divisor) + pred = np.argmax(predall,axis=0)+1 + probability = np.max(predall,axis=0) + test_label = np.argmax(test_label_ori, axis=0) + 1 + acc = np.mean(test_label.flatten()== pred.flatten()) + AUC=roc_auc_score(test_label_ori.tolist(),predall.tolist()) + F1_weight=f1_score(test_label.tolist(),pred.tolist()[0], average='weighted') + F1_macro=f1_score(test_label.tolist(),pred.tolist()[0], average='macro') + F1_micro=f1_score(test_label.tolist(),pred.tolist()[0], average='micro') + #AUC=1 + #F1_weight=1 + #F1_macro=1 + #F1_micro=1 + return pred,acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py b/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py new file mode 100644 index 0000000..5ac7cc9 --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/request_import.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +#本实验需要用到的库 +import time + +import numpy as np +import math + +from sklearn.preprocessing import StandardScaler +from sklearn import model_selection +from sklearn.metrics import * + +from sklearn import svm +from sklearn.svm import SVC +from sklearn.svm import LinearSVC + +import scipy.io as io +from scipy.io import loadmat +import os +from joblib import dump,load + +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC_Stream_Funcs import * +from StreamLearn.Algorithm.Algorithm_SAFC.Class_CIFAR10_SAFC_DandSAFC_ID import * \ No newline at end of file diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py b/StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py new file mode 100644 index 0000000..1fe346e --- /dev/null +++ b/StreamLearn/Algorithm/Algorithm_SAFC/validfunc_both.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +""" +Created on Sat Sep 14 2024 + +@author: zhangxinyue +""" + +import numpy as np +from SAFC import * +from sklearn import model_selection +from sklearn.metrics import * +from predict import * + +#%% +def validfunc(train_data,train_label,alpha_set,eta,method,W1): + n = np.shape(train_data)[1] + k_fold = 5 + kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) + both_statistic = np.zeros((len(alpha_set),len(alpha_set))) + for train_index, test_index in kf.split(train_data.T): + x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T + y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T + for i in range(1,len(alpha_set)+1): + for j in range(1,len(alpha_set)+1): + alpha = alpha_set[i-1] + beta = alpha_set[j-1] + if method=="SAFC_D": + w_ours1 = SAFC_D(W1,x_train,y_train,alpha,beta,eta) + both_i_j=Predict(w_ours1,x_test, y_test)[1]+Predict(w_ours1,x_test, y_test)[2] + both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j + elif method=="SAFC_ID": + w_ours2 = SAFC_ID(W1,x_train,y_train,alpha,beta,eta) + both_i_j=Predict(w_ours2,x_test, y_test)[1]+Predict(w_ours2,x_test, y_test)[2] + both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j + else: + print("Error!") + + alpha_position=int(np.argmax(both_statistic)/len(alpha_set)) + beta_position=np.argmax(both_statistic)%len(alpha_set) + alpha_best = alpha_set[alpha_position] + beta_best = alpha_set[beta_position] + return alpha_best,beta_best \ No newline at end of file -- Gitee From ff042a07eb9adb5552f6b99df53c1269b8e4f5e4 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:48:26 +0000 Subject: [PATCH 12/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/Algorithm/Algorithm=5FSAFC/.keep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/Algorithm_SAFC/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/.keep diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/.keep b/StreamLearn/Algorithm/Algorithm_SAFC/.keep deleted file mode 100644 index e69de29..0000000 -- Gitee From 61d2411504702872e456ec8f5a3dc6cc7de005d2 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:48:40 +0000 Subject: [PATCH 13/20] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20Models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/Algorithm_SAFC/Models/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 StreamLearn/Algorithm/Algorithm_SAFC/Models/.keep diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Models/.keep b/StreamLearn/Algorithm/Algorithm_SAFC/Models/.keep new file mode 100644 index 0000000..e69de29 -- Gitee From 18bf35125df169bb9498c65edfdf431be3f80a0d Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:49:01 +0000 Subject: [PATCH 14/20] =?UTF-8?q?=E9=87=8D=E5=91=BD=E5=90=8D=20StreamLearn?= =?UTF-8?q?/Algorithm/Algorithm=5FSAFC/Models=20=E4=B8=BA=20StreamLearn/Al?= =?UTF-8?q?gorithm/Algorithm=5FSAFC/ModelsIN?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/Algorithm_SAFC/{Models => ModelsIN}/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename StreamLearn/Algorithm/Algorithm_SAFC/{Models => ModelsIN}/.keep (100%) diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Models/.keep b/StreamLearn/Algorithm/Algorithm_SAFC/ModelsIN/.keep similarity index 100% rename from StreamLearn/Algorithm/Algorithm_SAFC/Models/.keep rename to StreamLearn/Algorithm/Algorithm_SAFC/ModelsIN/.keep -- Gitee From b02ede1381ceb38cb71dcfb9aa4c0ec55525d351 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:49:27 +0000 Subject: [PATCH 15/20] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20St?= =?UTF-8?q?reamLearn/tests/tests=5FSAFC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py | 279 ------------------ .../CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py | 77 ----- .../tests_SAFC/Dataset/dataset_information | 4 - StreamLearn/tests/tests_SAFC/MetricsPred.py | 19 -- StreamLearn/tests/tests_SAFC/ModelsIN/.keep | 0 .../tests/tests_SAFC/Read_to_Python.py | 41 --- StreamLearn/tests/tests_SAFC/SAFC.py | 165 ----------- .../tests/tests_SAFC/SAFC_Stream_Funcs.py | 273 ----------------- .../tests/tests_SAFC/compared_method.py | 242 --------------- StreamLearn/tests/tests_SAFC/predict.py | 49 --- .../tests/tests_SAFC/request_import.py | 33 --- .../tests/tests_SAFC/validfunc_both.py | 42 --- 12 files changed, 1224 deletions(-) delete mode 100644 StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py delete mode 100644 StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py delete mode 100644 StreamLearn/tests/tests_SAFC/Dataset/dataset_information delete mode 100644 StreamLearn/tests/tests_SAFC/MetricsPred.py delete mode 100644 StreamLearn/tests/tests_SAFC/ModelsIN/.keep delete mode 100644 StreamLearn/tests/tests_SAFC/Read_to_Python.py delete mode 100644 StreamLearn/tests/tests_SAFC/SAFC.py delete mode 100644 StreamLearn/tests/tests_SAFC/SAFC_Stream_Funcs.py delete mode 100644 StreamLearn/tests/tests_SAFC/compared_method.py delete mode 100644 StreamLearn/tests/tests_SAFC/predict.py delete mode 100644 StreamLearn/tests/tests_SAFC/request_import.py delete mode 100644 StreamLearn/tests/tests_SAFC/validfunc_both.py diff --git a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py b/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py deleted file mode 100644 index 4129044..0000000 --- a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_allFunc_main.py +++ /dev/null @@ -1,279 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load - -# %% Pre-define -# 定义一个稀疏矩阵的方法 -def ind2vec(indices, num_classes): - indices = np.asarray(indices) - out = np.zeros((num_classes, np.shape(indices)[1])) - for i in range(num_classes): - index3 = np.where(i + 1 == indices)[1] - out[i, index3] = 1 - return out - -def datareconsrtuct(X,Y): - #从大到小排列Y - sorted_index=np.argsort(Y, axis=None) - newY=Y[sorted_index] - #相应替换X的位置 - newX=X[sorted_index] - return newX,newY - -# %% -def SAFC_DandSAFC_ID_streamdata_all(PathSet,new_path,test_path): - # 存储地址 - save_dir="ModelsIN" - - # %%Parameters and given values - # Metrics - Acc_ours1, Acc_ours2= [], [] - AUC_ours1, AUC_ours2 = [], [] - F1_weight_ours1, F1_weight_ours2 = [], [] - F1_macro_ours1, F1_macro_ours2 = [], [] - F1_micro_ours1, F1_micro_ours2 = [], [] - - # Paras - eta = 0.1 - alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] - alpha = 0.001 - beta = 0.01 - - # %% 第一阶段数据读取与模型训练 - print('####eval####') - print("开始读取第一阶段数据!") - print('####eval####') - - X_past,Y_past=readbatchtoPython(PathSet) - # datareconstruction - X_past,Y_past=datareconsrtuct(X_past,Y_past) - Y_past=Y_past+1 - - print('####eval####') - print("第一阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s1=X_past - label_s1_vec=Y_past - label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) - - # 归一化 - print('####eval####') - print("第一阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s1 = scaler.fit_transform(data_s1) - print('####eval####') - print("第一阶段数据归一化完成!") - print('####eval####') - - # 第一阶段数据特征输出 - print('####eval####') - print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) - print('####eval####') - - # 第一阶段模型训练与存储,用SVM训练第一阶段模型 - print('####eval####') - print("begin svm1 training!") - print('####eval####') - svm1 = LinearSVC() - svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) - dump(svm1, save_dir+'/svm1.model') - print('####eval####') - print("end svm1 training!") - print('####eval####') - - # %% 第二阶段数据读取与模型训练 - print('####eval####') - print("开始读取第二阶段数据!") - print('####eval####') - - X_new,Y_new=readtopython(new_path) - X_new,Y_new=datareconsrtuct(X_new,Y_new) - Y_new=Y_new+1 - - print('####eval####') - print("第二阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s2=X_new - label_s2_vec=Y_new - label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) - - # 归一化 - print('####eval####') - print("第二阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s2 = scaler.fit_transform(data_s2) - print('####eval####') - print("第二阶段数据归一化完成!") - print('####eval####') - - # 第二阶段数据特征输出 - print('####eval####') - print("第二阶段涉及特征新增与类别新增!") - print('####eval####') - print('####eval####') - print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) - print('####eval####') - - # 第二阶段模型训练与存储 - # 变体一 - alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) - startours1 = time.time() - print('####eval####') - print("begin SAFC_D training!") - print('####eval####') - w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) - dump(w_ours1, save_dir+'/SAFC_D.model') - timeours1 = time.time() - startours1 - print('####eval####') - print(timeours1) - print('####eval####') - print('####eval####') - print("end SAFC_D training!") - print('####eval####') - - # 变体二 - alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) - startours2=time.time() - print('####eval####') - print("begin SAFC_ID training!") - print('####eval####') - w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) - dump(w_ours2, save_dir+'/SAFC_ID.model') - timeours2=time.time()-startours2 - print('####eval####') - print(timeours2) - print('####eval####') - print('####eval####') - print("end SAFC_ID training!") - print('####eval####') - - # %%测试数据读取与评估 - print('####eval####') - print("开始读取测试数据!") - print('####eval####') - - X_test,Y_test=readtopython(test_path) - X_test,Y_test=datareconsrtuct(X_test,Y_test) - Y_test=Y_test+1 - - print('####eval####') - print("测试数据读取完成!") - print('####eval####') - - # Given and transform - test_data=X_test - test_label_vec=Y_test - test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) - - # 归一化 - print('####eval####') - print("测试数据归一化!") - print('####eval####') - scaler = StandardScaler() - test_data = scaler.fit_transform(test_data) - print('####eval####') - print("测试数据归一化完成!") - print('####eval####') - - # 测试数据特征输出 - print('####eval####') - print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) - print('####eval####') - - # Evaluation - # 变体一 - print('####eval####') - print("begin SAFC_D testing!") - print('####eval####') - pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_D evaluation!") - print('####eval####') - Acc_ours1.append(acc_ours1) - AUC_ours1.append(auc_ours1) - F1_weight_ours1.append(f1wei_ours1) - F1_macro_ours1.append(f1macro_ours1) - F1_micro_ours1.append(f1micro_ours1) - - # 变体二 - print('####eval####') - print("begin SAFC_ID testing!") - print('####eval####') - pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_ID evaluation!") - print('####eval####') - Acc_ours2.append(acc_ours2) - AUC_ours2.append(auc_ours2) - F1_weight_ours2.append(f1wei_ours2) - F1_macro_ours2.append(f1macro_ours2) - F1_micro_ours2.append(f1micro_ours2) - - # 结果输出 - print('####eval####') - print("预测与评估完成,输出评估结果!") - print('####eval####') - - meanAcc_ours1 = np.mean(Acc_ours1) - meanAcc_ours2 = np.mean(Acc_ours2) - meanAuc_ours1 = np.mean(AUC_ours1) - meanAuc_ours2 = np.mean(AUC_ours2) - meanF1_macro_ours1 = np.mean(F1_macro_ours1) - meanF1_macro_ours2 = np.mean(F1_macro_ours2) - meanF1_weight_ours1 = np.mean(F1_weight_ours1) - meanF1_weight_ours2 = np.mean(F1_weight_ours2) - meanF1_micro_ours1 = np.mean(F1_micro_ours1) - meanF1_micro_ours2 = np.mean(F1_micro_ours2) - - print('####eval####') - print( - 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' - .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) - print('####eval####') - print('Finished!') - - return -# %% 主程序调用区 -if __name__ == "__main__": - # 读取地址 - data_batch_dir="Dataset//" - PathSet=[data_batch_dir+"data_batch_1.mat",data_batch_dir+"data_batch_2.mat",data_batch_dir+"data_batch_3.mat",data_batch_dir+"data_batch_4.mat"] - new_path=data_batch_dir+"data_batch_5.mat" - test_path=data_batch_dir+"test_batch.mat" - - SAFC_DandSAFC_ID_streamdata_all(PathSet,new_path,test_path) \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py b/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py deleted file mode 100644 index 6f2ee54..0000000 --- a/StreamLearn/tests/tests_SAFC/CIFAR10_SAFC_DandSAFC_ID_useFunc_main.py +++ /dev/null @@ -1,77 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load - -# %% 第一阶段、第二阶段、测试main函数 -# 存储地址 -save_dir="ModelsIN" - -# 读取地址 -data_batch_dir="Dataset//" -PathSet=[data_batch_dir+"data_batch_1.mat",data_batch_dir+"data_batch_2.mat",data_batch_dir+"data_batch_3.mat",data_batch_dir+"data_batch_4.mat"] -new_path=data_batch_dir+"data_batch_5.mat" -test_path=data_batch_dir+"test_batch.mat" - -# 第一阶段数据读取与训练 -SAFC_Stage1(PathSet,save_dir) -# 第一阶段模型提取 -print('####eval####') -print("Read stage1 model!") -print('####eval####') -stage1model=load(save_dir+'/svm1.model') -print('####eval####') -print("Have read stage1 model!") -print('####eval####') - -# 第二阶段数据读取与训练 -SAFC_Stage2(new_path,stage1model,save_dir) -# 第二阶段模型提取 -# 变体一 -print('####eval####') -print("Read stage2-SAFC_D model!") -print('####eval####') -stage2model1=load(save_dir+'/SAFC_D.model') -print('####eval####') -print("Have read stage2-SAFC_D model!") -print('####eval####') -# 变体二 -print('####eval####') -print("Read stage2-SAFC_D model!") -print('####eval####') -stage2model2=load(save_dir+'/SAFC_ID.model') -print('####eval####') -print("Have read stage2-SAFC_D model!") -print('####eval####') - -# 测试数据读取与评估 -SAFC_test(test_path,stage2model1,stage2model2) - diff --git a/StreamLearn/tests/tests_SAFC/Dataset/dataset_information b/StreamLearn/tests/tests_SAFC/Dataset/dataset_information deleted file mode 100644 index d04db55..0000000 --- a/StreamLearn/tests/tests_SAFC/Dataset/dataset_information +++ /dev/null @@ -1,4 +0,0 @@ -数据集 -通过百度网盘分享的文件:SAFC_datasets_CIFAR10.zip -链接:https://pan.baidu.com/s/1xtZjSxIIEMnUwoM7VXCzkQ -提取码:nudt \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/MetricsPred.py b/StreamLearn/tests/tests_SAFC/MetricsPred.py deleted file mode 100644 index 84c6359..0000000 --- a/StreamLearn/tests/tests_SAFC/MetricsPred.py +++ /dev/null @@ -1,19 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from sklearn.metrics import * - -# %% -def MetricsPred(test_label,test_label_mat,pred,prob): - - acc = np.mean(test_label.flatten()== pred.flatten()) - AUC=roc_auc_score(test_label_mat.tolist(),prob.tolist()) - F1_weight=f1_score(test_label.tolist(),pred.tolist(), average='weighted') - F1_macro=f1_score(test_label.tolist(),pred.tolist(), average='macro') - F1_micro=f1_score(test_label.tolist(),pred.tolist(), average='micro') - return acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/ModelsIN/.keep b/StreamLearn/tests/tests_SAFC/ModelsIN/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/StreamLearn/tests/tests_SAFC/Read_to_Python.py b/StreamLearn/tests/tests_SAFC/Read_to_Python.py deleted file mode 100644 index c8486ba..0000000 --- a/StreamLearn/tests/tests_SAFC/Read_to_Python.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from scipy.io import loadmat - -# %% -# tips -# import os -# print(f"当前工作目录: {os.getcwd()}") - -# 数据接口 -def readtopython(path): - data_part=loadmat(path) - X_part=data_part["data"].astype(np.float64) - Y_part=data_part["labels"].astype(np.float64) - return X_part,Y_part - -# Batch数据接口 -def readbatchtoPython(Path): - for path_index in range(len(Path)): - path=Path[path_index] - X_part,Y_part=readtopython(path) - if path_index==0: - X_past_original=X_part - Y_past_original=Y_part - if path_index!=0: - X_past_original=np.vstack((X_past_original,X_part)) - Y_past_original=np.vstack((Y_past_original,Y_part)) - # 第一阶段的数据没有No.9,即第10类数据 - Position_past=np.where(Y_past_original!=9)[0] - Y_past=Y_past_original[Position_past] - # 特征不含B通道的1024维特征 - X_past=X_past_original[Position_past,0:2048] - return X_past,Y_past - - diff --git a/StreamLearn/tests/tests_SAFC/SAFC.py b/StreamLearn/tests/tests_SAFC/SAFC.py deleted file mode 100644 index dbf74f4..0000000 --- a/StreamLearn/tests/tests_SAFC/SAFC.py +++ /dev/null @@ -1,165 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math - -#%% slove Memory Error key -#redefine X_21@D: -def AxdiagB(A,B): - #A=d*N,B=1*N - BL=B.tolist()[0] - C=[] - for i in range(np.shape(A)[0]): - C_row=[] - for j in range(np.shape(A)[1]): - ele=BL[j]*A[i,j] - C_row.append(ele) - C.append(C_row) - C_mat=np.mat(C) - return C_mat -#%% -# 函数1、SAFC_D -def SAFC_D(W1,train_data,train_label,alpha,beta,eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier. -##%% Our proposed method - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) - # 计算 W2_train_data 和 col_max - W2_train_data = np.dot(W2, train_data) - col_max = np.max(W2_train_data, axis=0) - M = W2_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - W22 = W22 - eta*W22_grad - W21=np.vstack(((W21_tilde),W21_hat)) - W2=np.hstack(((W21),W22)) - if object_value==[]: - np.mat(object_value.append(cost)) - else: - object_value=np.vstack(((object_value),cost)) - softmaxModel = W2 - return softmaxModel - -#%% -# 函数2、SAFC_ID函数 -def SAFC_ID(W1, train_data, train_label, alpha,beta, eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier . -##%% Our proposed method -#%set old cost and new cost value - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - epsilon = 0.00001 - # % epsilon = 1; - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W2*train_data,max(W2*train_data, [], 1)) - # 计算 W2_train_data 和 col_max - W2_train_data = np.dot(W2, train_data) - col_max = np.max(W2_train_data, axis=0) - M = W2_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - reg = (W21_tilde@X_21-W1@X_21).T - norm_column = [] - for i in range(nSam2): - norm_column.append(np.linalg.norm(reg[i,:])) - norm_column=np.mat(norm_column) - MM = 1.0/(norm_column+epsilon) - #D = np.diag(MM.tolist()[0])#% D = eye(nSam2)Memory Error - - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W2))+beta *np.sum(norm_column[0:]) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T - - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - W22_grad = -1/nSam2 * (train_label - p) @ X_22.T + 2*alpha * W22 - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - W22 = W22 - eta*W22_grad - W21=np.vstack(((W21_tilde),W21_hat)) - W2=np.hstack(((W21),W22)) - if object_value==[]: - np.mat(object_value.append(cost)) - else: - object_value=np.vstack(((object_value),cost)) - softmaxModel = W2 - - return softmaxModel \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/SAFC_Stream_Funcs.py b/StreamLearn/tests/tests_SAFC/SAFC_Stream_Funcs.py deleted file mode 100644 index 343167d..0000000 --- a/StreamLearn/tests/tests_SAFC/SAFC_Stream_Funcs.py +++ /dev/null @@ -1,273 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load - -# %% Adress and Pre-define -# 定义一个稀疏矩阵的方法 -def ind2vec(indices, num_classes): - indices = np.asarray(indices) - out = np.zeros((num_classes, np.shape(indices)[1])) - for i in range(num_classes): - index3 = np.where(i + 1 == indices)[1] - out[i, index3] = 1 - return out - -def datareconsrtuct(X,Y): - #从大到小排列Y - sorted_index=np.argsort(Y, axis=None) - newY=Y[sorted_index] - #相应替换X的位置 - newX=X[sorted_index] - return newX,newY - -# %% 定义第一阶段的读取训练与存储 -def SAFC_Stage1(PathSet,save_dir): - # 第一阶段数据读取与模型训练 - print('####eval####') - print("开始读取第一阶段数据!") - print('####eval####') - - X_past,Y_past=readbatchtoPython(PathSet) - X_past,Y_past=datareconsrtuct(X_past,Y_past) - Y_past=Y_past+1 - - print('####eval####') - print("第一阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s1=X_past - label_s1_vec=Y_past - label_s1 = ind2vec (Y_past.T, len(np.unique(Y_past))) - - # 归一化 - print('####eval####') - print("第一阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s1 = scaler.fit_transform(data_s1) - print('####eval####') - print("第一阶段数据归一化完成!") - print('####eval####') - - # 第一阶段数据特征输出 - print('####eval####') - print("第一阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s1)[0], np.shape(data_s1)[1], len(np.unique(Y_past)))) - print('####eval####') - - # 第一阶段模型训练与存储,用SVM训练第一阶段模型 - print('####eval####') - print("begin svm1 training!") - print('####eval####') - svm1 = LinearSVC() - svm1.fit(data_s1.tolist(), label_s1_vec.tolist()) - dump(svm1, save_dir+'/svm1.model') - print('####eval####') - print("end svm1 training!") - print('####eval####') - - return - -# %% 定义第二阶段的读取训练与存储 -def SAFC_Stage2(new_path,svm1,save_dir): - # Paras - eta = 0.1 - alpha_set = [10 ** -2, 10 ** -1, 10 ** 0] - alpha = 0.001 - beta = 0.01 - - # 第二阶段数据读取与模型训练 - print('####eval####') - print("开始读取第二阶段数据!") - print('####eval####') - - X_new,Y_new=readtopython(new_path) - X_new,Y_new=datareconsrtuct(X_new,Y_new) - Y_new=Y_new+1 - - print('####eval####') - print("第二阶段数据读取完成!") - print('####eval####') - - # Given and transform - data_s2=X_new - label_s2_vec=Y_new - label_s2 = ind2vec (Y_new.T, len(np.unique(Y_new))) - - # 归一化 - print('####eval####') - print("第二阶段数据归一化!") - print('####eval####') - scaler = StandardScaler() - data_s2 = scaler.fit_transform(data_s2) - print('####eval####') - print("第二阶段数据归一化完成!") - print('####eval####') - - # 第二阶段数据特征输出 - print('####eval####') - print("第二阶段涉及特征新增与类别新增!") - print('####eval####') - print('####eval####') - print("第二阶段数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(data_s2)[0], np.shape(data_s2)[1], len(np.unique(Y_new)))) - print('####eval####') - - # 第二阶段模型训练与存储 - # 变体一 - alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.mat(svm1.coef_)) - startours1 = time.time() - print('####eval####') - print("begin SAFC_D training!") - print('####eval####') - w_ours1 = SAFC_D(np.mat(svm1.coef_), np.transpose(data_s2), label_s2, alpha_best1, beta_best1, eta) - dump(w_ours1, save_dir+'/SAFC_D.model') - timeours1 = time.time() - startours1 - print('####eval####') - print(timeours1) - print('####eval####') - print('####eval####') - print("end SAFC_D training!") - print('####eval####') - - # 变体二 - alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.mat(svm1.coef_)) - startours2=time.time() - print('####eval####') - print("begin SAFC_ID training!") - print('####eval####') - w_ours2 = SAFC_ID(np.mat(svm1.coef_),np.transpose(data_s2),label_s2,alpha_best2,beta_best2,eta) - dump(w_ours2, save_dir+'/SAFC_ID.model') - timeours2=time.time()-startours2 - print('####eval####') - print(timeours2) - print('####eval####') - print('####eval####') - print("end SAFC_ID training!") - print('####eval####') - - return - -# %% 定义测试数据读取与评估 -def SAFC_test(test_path,w_ours1,w_ours2): - # Metrics - Acc_ours1, Acc_ours2= [], [] - AUC_ours1, AUC_ours2 = [], [] - F1_weight_ours1, F1_weight_ours2 = [], [] - F1_macro_ours1, F1_macro_ours2 = [], [] - F1_micro_ours1, F1_micro_ours2 = [], [] - - # 测试数据读取与评估 - print('####eval####') - print("开始读取测试数据!") - print('####eval####') - - X_test,Y_test=readtopython(test_path) - X_test,Y_test=datareconsrtuct(X_test,Y_test) - Y_test=Y_test+1 - - print('####eval####') - print("测试数据读取完成!") - print('####eval####') - - # Given and transform - test_data=X_test - test_label_vec=Y_test - test_label = ind2vec (Y_test.T, len(np.unique(Y_test))) - - # 归一化 - print('####eval####') - print("测试数据归一化!") - print('####eval####') - scaler = StandardScaler() - test_data = scaler.fit_transform(test_data) - print('####eval####') - print("测试数据归一化完成!") - print('####eval####') - - # 测试数据特征输出 - print('####eval####') - print("测试数据:数据量:{},特征维度:{},类别数:{}".format(np.shape(test_data)[0], np.shape(test_data)[1], len(np.unique(Y_test)))) - print('####eval####') - - # Evaluation - # 变体一 - print('####eval####') - print("begin SAFC_D testing!") - print('####eval####') - pred1, acc_ours1, auc_ours1, f1wei_ours1, f1macro_ours1, f1micro_ours1 = Predict(w_ours1,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_D evaluation!") - print('####eval####') - Acc_ours1.append(acc_ours1) - AUC_ours1.append(auc_ours1) - F1_weight_ours1.append(f1wei_ours1) - F1_macro_ours1.append(f1macro_ours1) - F1_micro_ours1.append(f1micro_ours1) - - # 变体二 - print('####eval####') - print("begin SAFC_ID testing!") - print('####eval####') - pred2,acc_ours2,auc_ours2,f1wei_ours2,f1macro_ours2,f1micro_ours2 = Predict(w_ours2,np.transpose(test_data),test_label) - print('####eval####') - print("begin SAFC_ID evaluation!") - print('####eval####') - Acc_ours2.append(acc_ours2) - AUC_ours2.append(auc_ours2) - F1_weight_ours2.append(f1wei_ours2) - F1_macro_ours2.append(f1macro_ours2) - F1_micro_ours2.append(f1micro_ours2) - - # 结果输出 - print('####eval####') - print("预测与评估完成,输出评估结果!") - print('####eval####') - - meanAcc_ours1 = np.mean(Acc_ours1) - meanAcc_ours2 = np.mean(Acc_ours2) - meanAuc_ours1 = np.mean(AUC_ours1) - meanAuc_ours2 = np.mean(AUC_ours2) - meanF1_macro_ours1 = np.mean(F1_macro_ours1) - meanF1_macro_ours2 = np.mean(F1_macro_ours2) - meanF1_weight_ours1 = np.mean(F1_weight_ours1) - meanF1_weight_ours2 = np.mean(F1_weight_ours2) - meanF1_micro_ours1 = np.mean(F1_micro_ours1) - meanF1_micro_ours2 = np.mean(F1_micro_ours2) - - print('####eval####') - print( - 'meanAcc_ours1{:.4f},meanAcc_ours2{:.4f},meanAuc_ours1{:.4f},meanAuc_ours2{:.4f},meanF1_macro_ours1{:.4f},meanF1_macro_ours2{:.4f},meanF1_weight_ours1{:.4f},meanF1_weight_ours2{:.4f},meanF1_micro_ours1{:.4f},meanF1_micro_ours2{:.4f}' - .format(meanAcc_ours1,meanAcc_ours2, meanAuc_ours1,meanAuc_ours2,meanF1_macro_ours1,meanF1_macro_ours2,meanF1_weight_ours1,meanF1_weight_ours2,meanF1_micro_ours1,meanF1_micro_ours2)) - print('####eval####') - print('Finished!') - - return \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/compared_method.py b/StreamLearn/tests/tests_SAFC/compared_method.py deleted file mode 100644 index 613c1c2..0000000 --- a/StreamLearn/tests/tests_SAFC/compared_method.py +++ /dev/null @@ -1,242 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math -from sklearn import model_selection - -from predict import * - -#%% slove Memory Error key -#redefine X_21@D: -def AxdiagB(A,B): - #A=d*N,B=1*N - BL=B.tolist()[0] - C=[] - for i in range(np.shape(A)[0]): - C_row=[] - for j in range(np.shape(A)[1]): - ele=BL[j]*A[i,j] - C_row.append(ele) - C.append(C_row) - C_mat=np.mat(C) - return C_mat - -# %% -# 函数1-1、mysoftmax函数中涉及到的find_best_w函数 -def find_best_w(train_data,train_label, alpha, eta): - [n_Fea,n_Sam] = np.shape(train_data) - nCla = np.shape(train_label)[0] - W = 0.005*np.ones((nCla,n_Fea)) - count=0 #% count the running number - #%set old cost and new cost value - cost_old=0 - cost=1 - object_value=[] #% record the cost at each update iteration - loop_max=3000 - while (abs(cost_old - cost) > 10^-6)and (count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W*train_data,max(W*train_data, [], 1)); - # 计算 W_train_data 和 col_max - W_train_data = np.dot(W, train_data) - col_max = np.max(W_train_data, axis=0) - M = W_train_data - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - #.flatten()张成向量 - cost = -1/n_Sam * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W)) - W_grad = -1/n_Sam * (train_label - p) @ train_data.T + 2*alpha * W - W = W - eta*W_grad - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - W1=W - return W1 - -#%% -# 函数1、mysoftmax函数 -def mysoftmax( train_data,train_label,alpha_set,eta): -##%% Input: -#% train_data: d*n, where d is the dimension of data. -#% n is the number of data. -#% train_label: c*n, each column is a one-hot vector. -#% alpha_set: the hyperparameters set, which is used for cross-validation. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: c*d, the learned classifier. -#% alpha_best: the best hyperparameter determined by cross-validation. -##%% cross validation - if len(alpha_set)>1: - alpha_num = len(alpha_set) - n = np.shape(train_data)[1] - k_fold = 5 - #Indices = crossvalind('Kfold', n, k_fold) - kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) - acc_statistic = np.zeros((alpha_num,1)) - for train_index, test_index in kf.split(train_data.T): - #print("TRAIN:", train_index, "TEST:", test_index) - x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T - y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T - for i in range(1,alpha_num+1): - alpha = alpha_set[i-1] - w_i = find_best_w(x_train,y_train,alpha,eta) - acc_i= Predict(w_i,x_test, y_test)[1] - acc_statistic[i-1]= acc_statistic[i-1]+acc_i - alpha_position = np.argmax(acc_statistic) - alpha_best = alpha_set[alpha_position] - softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) - else: - alpha_best = alpha_set[0] - #由于w_s2当中使用mysoftmax函数时候,传入的是浮点数alpha列表化之后的形式,所以这里要取一下浮点数 - softmaxModel = find_best_w( train_data,train_label, alpha_best, eta) - return softmaxModel,alpha_best - -#%% -# 函数2、对比方法文件夹中的Left1函数 -def Left1(W1,train_data,train_label,alpha,beta,eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier. -##%% Our proposed method - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) - # 计算 W2_X_21 和 col_max - W21_X_21= np.dot(W21, X_21) - col_max = np.max(W21_X_21, axis=0) - M = W21_X_21 - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(np.square(W21_tilde.flatten()-W1.flatten())) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + 2*beta * (W21_tilde-W1) - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - - W21=np.vstack(((W21_tilde),W21_hat)) - - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - - softmaxModel = W21 - return softmaxModel - -#%% -# 函数3、对比方法文件夹中的Left2函数 -def Left2(W1, train_data, train_label, alpha,beta, eta): -##%% Input: -#% W1: c*d1, a model learned in previous stage, and it will be reused in -#% current stage. W1 can be learned by using 'mysoftmax' function. -#% c and d1 are the number of classes and the dimension of data in -#% previous stage, respectively. -#% train_data: d2*n, where d2 is the dimension of data in current stage, d2 > d1. -#% n is the number of data in current stage. -#% train_label: (c+1)*n, each column is a one-hot vector. -#% alpha and beta: the hyperparameters in the model, alpha > 0, beta >0. -#% eta: the step size in the gradient descent algorithm. -##%% Output: -#% softmaxModel: (c+1)*d, the learned classifier . -##%% Our proposed method -#%set old cost and new cost value - cost_old = 0 #%set old cost and new cost value - cost = 1 - object_value = [] #% record the cost at each update iteration - count = 0 #% count the running number - nCla1,nFea1 = np.shape(W1) - nCla2,nSam2 = np.shape(train_label) - nFea2 = np.shape(train_data)[0] - X_21 = train_data[0:nFea1,:] - X_22 = train_data[nFea1:,:] - W2 = 0.005*np.ones((nCla2,nFea2)) #% Initialise classifier W - #%W2= mysoftmax(data_s2,label_s2,alpha_set, eta)[0] - W21 = W2[:,0:nFea1] - W22 = W2[:,nFea1:] - W21_tilde = W21[0:nCla1,:] - W21_hat = W21[nCla1,:] - W22_tilde = W22[0:nCla1,:] - W22_hat = W22[nCla1,:] - epsilon = 0.00001 - # % epsilon = 1; - loop_max=3000 - while (abs(cost_old - cost) > 10^-6 ) and ( count < loop_max ): - #%( abs(cost_old - cost) > 0.0001*cost ) && - cost_old = cost - count=count+1 - #M = bsxfun(@minus,W21*X_21,max(W21*X_21, [], 1)) - # 计算 W2_X_21 和 col_max - W21_X_21= np.dot(W21, X_21) - col_max = np.max(W21_X_21, axis=0) - M = W21_X_21 - col_max - M = np.exp(M) - #p = bsxfun(@rdivide, M, sum(M)); - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - p= np.divide(M, divisor) - reg = (W21_tilde@X_21-W1@X_21).T - norm_column = [] - for i in range(nSam2): - norm_column.append(np.linalg.norm(reg[i,:])) - norm_column=np.mat(norm_column) - MM = 1.0/(norm_column+epsilon) - #D = np.diag(MM.tolist()[0])#% D = eye(nSam2); Memory Error - cost = -1/nSam2 * train_label.flatten() @ np.log(p.flatten()).T + alpha * np.sum(np.square(W21.flatten()))+beta *np.sum(norm_column[0:]) - W21_tilde_grad = -1/nSam2 * (train_label[0:nCla1,:] - p[0:nCla1,:]) @ X_21.T + 2*alpha*W21_tilde + beta *(AxdiagB(X_21,MM)@(W21_tilde@X_21-W1@X_21).T).T - W21_hat_grad = -1/nSam2 * (train_label[nCla1,:] - p[nCla1,:]) @ X_21.T + 2*alpha*W21_hat - - W21_tilde = W21_tilde - eta*W21_tilde_grad - W21_hat = W21_hat - eta*W21_hat_grad - - W21=np.vstack(((W21_tilde),W21_hat)) - - if object_value==[]: - object_value = np.vstack((np.tile(object_value, (cost.shape[1],1)).T, cost)) - else: - object_value=np.vstack(((object_value),cost)) - - softmaxModel = W21 - return softmaxModel diff --git a/StreamLearn/tests/tests_SAFC/predict.py b/StreamLearn/tests/tests_SAFC/predict.py deleted file mode 100644 index 9b75edf..0000000 --- a/StreamLearn/tests/tests_SAFC/predict.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -import math -from sklearn.metrics import * - -#%% -# 函数、Predict函数 -def Predict(softmaxModel, test_data,test_label_ori): -##%% Input: -#% softmaxModel: the learned multi-class classifier. -#% test_data: the d*n input data matrix, where each column test_data(:, i) corresponds to -#% a single test set -#% test_label: the c*n input label matrix -#% our code should produce the prediction matrix -##%% Output: -#% pred: a n-dimension vector, where pred(i) is the prediction for the i-th test_data -#% acc: the testing accuracy. - pred = np.zeros((1, np.shape(test_data)[1])) -##%% ---------- SoftmaxPredict -------------------------------------- -#% Compute pred assuming that the labels start from 1. - test_data = test_data[0:np.shape(softmaxModel)[1],:] - #M = bsxfun(@minus,,max(softmaxModel*test_data,[],1)); - softmaxModel_test_data = softmaxModel@test_data - col_max = np.max(softmaxModel_test_data, axis=0) - M = softmaxModel_test_data - col_max - M = np.exp(M) - #predall = bsxfun(@rdivide, M, sum(M)) - col_sum = np.sum(M, axis=0) - divisor = np.tile(col_sum, (M.shape[0], 1)) - predall= np.divide(M, divisor) - pred = np.argmax(predall,axis=0)+1 - probability = np.max(predall,axis=0) - test_label = np.argmax(test_label_ori, axis=0) + 1 - acc = np.mean(test_label.flatten()== pred.flatten()) - AUC=roc_auc_score(test_label_ori.tolist(),predall.tolist()) - F1_weight=f1_score(test_label.tolist(),pred.tolist()[0], average='weighted') - F1_macro=f1_score(test_label.tolist(),pred.tolist()[0], average='macro') - F1_micro=f1_score(test_label.tolist(),pred.tolist()[0], average='micro') - #AUC=1 - #F1_weight=1 - #F1_macro=1 - #F1_micro=1 - return pred,acc,AUC,F1_weight,F1_macro,F1_micro \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/request_import.py b/StreamLearn/tests/tests_SAFC/request_import.py deleted file mode 100644 index 071fdba..0000000 --- a/StreamLearn/tests/tests_SAFC/request_import.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -#本实验需要用到的库 -import time - -import numpy as np -import math - -from sklearn.preprocessing import StandardScaler -from sklearn import model_selection -from sklearn.metrics import * - -from compared_method import * -from predict import * -from SAFC import * -from MetricsPred import * -from validfunc_both import * -from Read_to_Python import * -from SAFC_Stream_Funcs import * - -from sklearn import svm -from sklearn.svm import SVC -from sklearn.svm import LinearSVC - -import scipy.io as io -from scipy.io import loadmat -import os -from joblib import dump,load \ No newline at end of file diff --git a/StreamLearn/tests/tests_SAFC/validfunc_both.py b/StreamLearn/tests/tests_SAFC/validfunc_both.py deleted file mode 100644 index 1fe346e..0000000 --- a/StreamLearn/tests/tests_SAFC/validfunc_both.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sat Sep 14 2024 - -@author: zhangxinyue -""" - -import numpy as np -from SAFC import * -from sklearn import model_selection -from sklearn.metrics import * -from predict import * - -#%% -def validfunc(train_data,train_label,alpha_set,eta,method,W1): - n = np.shape(train_data)[1] - k_fold = 5 - kf=model_selection.KFold(n_splits=5,shuffle=False,random_state=None) - both_statistic = np.zeros((len(alpha_set),len(alpha_set))) - for train_index, test_index in kf.split(train_data.T): - x_train, x_test = train_data.T[train_index].T, train_data.T[test_index].T - y_train, y_test = train_label.T[train_index].T, train_label.T[test_index].T - for i in range(1,len(alpha_set)+1): - for j in range(1,len(alpha_set)+1): - alpha = alpha_set[i-1] - beta = alpha_set[j-1] - if method=="SAFC_D": - w_ours1 = SAFC_D(W1,x_train,y_train,alpha,beta,eta) - both_i_j=Predict(w_ours1,x_test, y_test)[1]+Predict(w_ours1,x_test, y_test)[2] - both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j - elif method=="SAFC_ID": - w_ours2 = SAFC_ID(W1,x_train,y_train,alpha,beta,eta) - both_i_j=Predict(w_ours2,x_test, y_test)[1]+Predict(w_ours2,x_test, y_test)[2] - both_statistic[i-1,j-1]= both_statistic[i-1,j-1]+both_i_j - else: - print("Error!") - - alpha_position=int(np.argmax(both_statistic)/len(alpha_set)) - beta_position=np.argmax(both_statistic)%len(alpha_set) - alpha_best = alpha_set[alpha_position] - beta_best = alpha_set[beta_position] - return alpha_best,beta_best \ No newline at end of file -- Gitee From 575892fc70dcffd6a26a8b8b62e650edd36f1588 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 16:49:49 +0000 Subject: [PATCH 16/20] test_update Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- StreamLearn/tests/test_SAFC.py | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 StreamLearn/tests/test_SAFC.py diff --git a/StreamLearn/tests/test_SAFC.py b/StreamLearn/tests/test_SAFC.py new file mode 100644 index 0000000..d93dfe4 --- /dev/null +++ b/StreamLearn/tests/test_SAFC.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Nov 22 20:02:07 2024 + +@author: zhangxinyue +""" + +import time + +import numpy as np +import math + +from sklearn.preprocessing import StandardScaler +from sklearn import model_selection +from sklearn.metrics import * + +from sklearn import svm +from sklearn.svm import SVC +from sklearn.svm import LinearSVC + +import scipy.io as io +from scipy.io import loadmat +import os +from joblib import dump,load + +from types import SimpleNamespace + +#Ours +from StreamLearn.Algorithm.Algorithm_SAFC.compared_method import * +from StreamLearn.Algorithm.Algorithm_SAFC.predict import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC import * +from StreamLearn.Algorithm.Algorithm_SAFC.MetricsPred import * +from StreamLearn.Algorithm.Algorithm_SAFC.validfunc_both import * +from StreamLearn.Algorithm.Algorithm_SAFC.Read_to_Python import * +from StreamLearn.Algorithm.Algorithm_SAFC.SAFC_Stream_Funcs import * +from StreamLearn.Algorithm.Algorithm_SAFC.Class_CIFAR10_SAFC_DandSAFC_ID import * + +#%% +def train_and_evaluate_stream_SAFC(args_address,runT): + alg_SAFC=SAFC_achieve(args_address) + for i in range(runT): + alg_SAFC.stream_fit() + alg_SAFC.stream_stream_evaluate() + return + +def main(): + # 地址参数定义 + data_batch_dir="Dataset//" + args_address=SimpleNamespace( + save_dir="ModelsIN", + PathSet=[data_batch_dir+"data_batch_1.mat",data_batch_dir+"data_batch_2.mat",data_batch_dir+"data_batch_3.mat",data_batch_dir+"data_batch_4.mat"], + new_path=[data_batch_dir+"data_batch_5.mat"], + test_path=data_batch_dir+"test_batch.mat") + # 运行次数定义 + runT=3 + # 运行 + train_and_evaluate_stream_SAFC(args_address,runT) + +if __name__ == '__main__': + main() \ No newline at end of file -- Gitee From f7a0afda560d62bba5015d59b8a4aeaf22b2828f Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 17:01:15 +0000 Subject: [PATCH 17/20] update README.md. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index b82970c..51d2a14 100644 --- a/README.md +++ b/README.md @@ -354,15 +354,15 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH 流式数据读取,传入数据地址: ```python -def __init__(self,args_address): - # 第一阶段、第二阶段、测试main函数 - # 存储地址 - self.save_dir=args_address.save_dir - - # 读取地址 - self.PathSet=args_address.PathSet - self.new_path=args_address.new_path - self.test_path=args_address.test_path +class SAFC_achieve(): + def __init__(self,args_address): + # 存储地址 + self.save_dir=args_address.save_dir + + # 读取地址 + self.PathSet=args_address.PathSet + self.new_path=args_address.new_path + self.test_path=args_address.test_path ``` -- Gitee From 46634015357209ee6d785353931b972ed4d3e106 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Fri, 22 Nov 2024 17:16:32 +0000 Subject: [PATCH 18/20] update README.md. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 51d2a14..b6b6bce 100644 --- a/README.md +++ b/README.md @@ -354,15 +354,14 @@ python StreamLearn/tests/test_ODS.py --data PATH_TO_DATA --checkpoint PATH_TO_CH 流式数据读取,传入数据地址: ```python -class SAFC_achieve(): - def __init__(self,args_address): - # 存储地址 - self.save_dir=args_address.save_dir +def __init__(self,args_address): + # 存储地址 + self.save_dir=args_address.save_dir - # 读取地址 - self.PathSet=args_address.PathSet - self.new_path=args_address.new_path - self.test_path=args_address.test_path + # 读取地址 + self.PathSet=args_address.PathSet + self.new_path=args_address.new_path + self.test_path=args_address.test_path ``` -- Gitee From 944d7bc26de61d7b43c8ac0f30085c41fc1ef473 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Sat, 23 Nov 2024 08:17:04 +0000 Subject: [PATCH 19/20] update README.md. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b6b6bce..7ce63eb 100644 --- a/README.md +++ b/README.md @@ -641,6 +641,7 @@ def SAFC_test(test_path,w_ours1,w_ours2): ModelsIN用于存放训练获得的分类器 + 测试主文件为test_SAFC.py -- Gitee From c75d739192e047d3e5f00e346cf7d1a23a42fa1f Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Sat, 23 Nov 2024 08:17:35 +0000 Subject: [PATCH 20/20] update StreamLearn/tests/test_SAFC.py. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- StreamLearn/tests/test_SAFC.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/StreamLearn/tests/test_SAFC.py b/StreamLearn/tests/test_SAFC.py index d93dfe4..916ff87 100644 --- a/StreamLearn/tests/test_SAFC.py +++ b/StreamLearn/tests/test_SAFC.py @@ -40,7 +40,7 @@ def train_and_evaluate_stream_SAFC(args_address,runT): alg_SAFC=SAFC_achieve(args_address) for i in range(runT): alg_SAFC.stream_fit() - alg_SAFC.stream_stream_evaluate() + alg_SAFC.stream_evaluate() return def main(): -- Gitee