From dd8f442d0b0e6ab570a46a178f0933f3c1df2a7a Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Thu, 24 Jul 2025 12:55:01 +0000 Subject: [PATCH 1/5] update StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py b/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py index 49ba73d..e2e4952 100644 --- a/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py +++ b/StreamLearn/Algorithm/Algorithm_SAFC/Read_to_Python.py @@ -16,8 +16,8 @@ from scipy.io import loadmat # 数据接口 def readtopython(path): data_part=loadmat(path) - X_part=data_part["data"].astype(np.float64) - Y_part=data_part["labels"].astype(np.float64) + X_part=data_part["data"][:50].astype(np.float64) + Y_part=data_part["labels"][:50].astype(np.float64) return X_part,Y_part # Batch数据接口 -- Gitee From 3211b12a3aca03a3b7d0a94b017042be8adac90e Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Thu, 24 Jul 2025 12:55:42 +0000 Subject: [PATCH 2/5] update StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py index d797c74..2b0a995 100644 --- a/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py +++ b/StreamLearn/Algorithm/Algorithm_SAFC/SAFC_Stream_Funcs.py @@ -149,7 +149,7 @@ def SAFC_Stage2(new_path,svm1,save_dir): # 变体一 model_path = save_dir / 'SAFC_D.model' if not model_path.exists(): - alpha_best1, beta_best1 = validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.asmatrix(svm1.coef_)) + alpha_best1, beta_best1 = 0.1,0.1#validfunc(np.transpose(data_s2), label_s2, alpha_set, eta, "SAFC_D",np.asmatrix(svm1.coef_)) startours1 = time.time() # print('####eval####') # print("begin SAFC_D training!") @@ -167,7 +167,7 @@ def SAFC_Stage2(new_path,svm1,save_dir): # 变体二 model_path = save_dir / 'SAFC_ID.model' if not model_path.exists(): - alpha_best2,beta_best2=validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.asmatrix(svm1.coef_)) + alpha_best2,beta_best2=0.1,0.1#validfunc(np.transpose(data_s2),label_s2,alpha_set,eta,"SAFC_ID",np.asmatrix(svm1.coef_)) startours2=time.time() # print('####eval####') # print("begin SAFC_ID training!") -- Gitee From cfef458240baef90e7d90e426dc9e223f62b48b5 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Thu, 24 Jul 2025 13:43:28 +0000 Subject: [PATCH 3/5] update StreamLearn/Algorithm/Algorithm_BBDM/Class_CIFAR10_BBDM.py. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- .../Algorithm/Algorithm_BBDM/Class_CIFAR10_BBDM.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/StreamLearn/Algorithm/Algorithm_BBDM/Class_CIFAR10_BBDM.py b/StreamLearn/Algorithm/Algorithm_BBDM/Class_CIFAR10_BBDM.py index 04889ab..8a56d5f 100644 --- a/StreamLearn/Algorithm/Algorithm_BBDM/Class_CIFAR10_BBDM.py +++ b/StreamLearn/Algorithm/Algorithm_BBDM/Class_CIFAR10_BBDM.py @@ -238,9 +238,9 @@ class BBDM_achieve(StreamAlgorithm): self.AABBDM_min = self.Acc_BBDM[i] self.lam_opt = self.lam1[i] - self.checkpoint = torch.load(self.algpath / 'checkpoint/ckpt.pt', weights_only=True) - self.train_model.load_state_dict(self.checkpoint['model']) - self.Acc['NA'],self.Recall['NA'],self.Fscore['NA'] = train_validate_test(self.algpath,self.args, self.device, np.ones(self.num_classes), self.train_model, self.train_loader, self.test_loader, self.valid_loader, self.test_labels, self.num_classes) + # self.checkpoint = torch.load(self.algpath / 'checkpoint/ckpt.pt', weights_only=True) + # self.train_model.load_state_dict(self.checkpoint['model']) + # self.Acc['NA'],self.Recall['NA'],self.Fscore['NA'] = train_validate_test(self.algpath,self.args, self.device, np.ones(self.num_classes), self.train_model, self.train_loader, self.test_loader, self.valid_loader, self.test_labels, self.num_classes) self.pred1_BBDM, self.est_BBDM = BBDM_estimation(self.preds_test, self.classifier_priors,self.K1,self.K2,self.K3, termination_difference=0.0001,lam = self.lam_opt,max_iter=3000) self.Weight['BMDM'] = self.est_BBDM.numpy()/self.classifier_priors.numpy() @@ -248,9 +248,9 @@ class BBDM_achieve(StreamAlgorithm): self.train_model.load_state_dict(self.checkpoint['model']) self.Acc['BMDM'],self.Recall['BMDM'],self.Fscore['BMDM'] = train_validate_test(self.algpath,self.args, self.device, self.Weight['BMDM'], self.train_model, self.train_loader, self.test_loader, self.valid_loader, self.test_labels, self.num_classes) - self.checkpoint = torch.load(self.algpath / 'checkpoint/ckpt.pt', weights_only=True) - self.train_model.load_state_dict(self.checkpoint['model']) - self.Acc['Oracle'],self.Recall['Oracle'],self.Fscore['Oracle'] = train_validate_test(self.algpath,self.args, self.device, self.Weight['Oracle'], self.train_model, self.train_loader, self.test_loader, self.valid_loader, self.test_labels, self.num_classes) + # self.checkpoint = torch.load(self.algpath / 'checkpoint/ckpt.pt', weights_only=True) + # self.train_model.load_state_dict(self.checkpoint['model']) + # self.Acc['Oracle'],self.Recall['Oracle'],self.Fscore['Oracle'] = train_validate_test(self.algpath,self.args, self.device, self.Weight['Oracle'], self.train_model, self.train_loader, self.test_loader, self.valid_loader, self.test_labels, self.num_classes) self.MSE['BMDM'] = np.sum(np.square(self.Weight['Oracle'] - self.Weight['BMDM']))/self.num_classes -- Gitee From 90c31d49289c4786ba574d239ae92e2c3f21e072 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Thu, 24 Jul 2025 13:54:53 +0000 Subject: [PATCH 4/5] add StreamLearn/tests/test_BBDM_demo.py. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- StreamLearn/tests/test_BBDM_demo.py | 627 ++++++++++++++++++++++++++++ 1 file changed, 627 insertions(+) create mode 100644 StreamLearn/tests/test_BBDM_demo.py diff --git a/StreamLearn/tests/test_BBDM_demo.py b/StreamLearn/tests/test_BBDM_demo.py new file mode 100644 index 0000000..5b368e5 --- /dev/null +++ b/StreamLearn/tests/test_BBDM_demo.py @@ -0,0 +1,627 @@ +from __future__ import print_function +import argparse +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch.utils.data as data +import torchvision +import torchvision.transforms as transforms +from torchvision import datasets +import numpy as np +# from Gendataloader import GetLoader +# from cifar10_for_labelshift import CIFAR10_SHIFT +# from resnet import * +import cvxpy as cp +import sklearn.metrics +from sklearn.metrics import precision_recall_fscore_support +import os +import copy +import random +import pandas as pd +#from algorithms import * +import matplotlib.pyplot as plt +#from KMM import KMM +from IPython.display import display +from sklearn.metrics import confusion_matrix +import time + +from StreamLearn.Algorithm.Algorithm_BBDM.Gendataloader import GetLoader +from StreamLearn.Algorithm.Algorithm_BBDM.cifar10_for_labelshift import CIFAR10_SHIFT +from StreamLearn.Algorithm.Algorithm_BBDM.resnet import * +from StreamLearn.Algorithm.Algorithm_BBDM.algorithms import * +from StreamLearn.Algorithm.Algorithm_BBDM.KMM import KMM +from StreamLearn.Algorithm.Algorithm_BBDM.BBDM import * + +class Net(nn.Module): + def __init__(self, D_in, H, D_out): + super(Net, self).__init__() + self.D_in = D_in + self.H = H + self.D_out = D_out + self.model = torch.nn.Sequential( + torch.nn.Linear(self.D_in, self.H), + torch.nn.ReLU(), + torch.nn.Linear(self.H, self.D_out), + ) + + def forward(self, x): + x = x.view(-1, self.D_in) + x = self.model(x) + return x + +class ConvNet(nn.Module): + def __init__(self): + super(ConvNet, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + +def train(args, model, device, train_loader, optimizer, epoch, weight=None): + model.train() + #outputsval = np.empty([0,10]) + for batch_idx, (data, target) in enumerate(train_loader): + target = target.type(torch.LongTensor) + target = target.to(device) + data = data.to(device) + optimizer.zero_grad() + output = model(data) + if weight is None: + criterion = nn.CrossEntropyLoss() + else: + criterion = nn.CrossEntropyLoss(weight) + loss = criterion(output, target) + loss.backward() + optimizer.step() + return output + +def test(args, model, device, test_loader, weight=None): + model.eval() + test_loss = 0 + correct = 0 + outs = np.empty([0,1]) + outputsval = np.empty([0,10]) + prediction = np.empty([0,1]) + with torch.no_grad(): + for data, target in test_loader: + target = target.type(torch.LongTensor) + target = target.to(device) + data = data.to(device) + output = model(data) + if weight is None: + criterion = nn.CrossEntropyLoss(reduction='sum') + else: + criterion = nn.CrossEntropyLoss(weight, reduction='sum') + #target = target.type(torch.LongTensor) + loss = criterion(output, target) + outputs = F.softmax(output,dim=1) + test_loss += loss.item()# sum up batch loss + out = outputs.max(1, keepdim=True)[0] + pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability + out = out.cpu().numpy() + correct += pred.eq(target.view_as(pred)).sum().item() + pred = pred.cpu().numpy() + prediction = np.concatenate((prediction, pred)) + outs = np.concatenate((outs, out)) + outputs = outputs.cpu().numpy() + outputsval = np.concatenate((outputsval, output)) + test_loss /= len(test_loader.dataset) + return prediction, 100. * correct / len(test_loader.dataset), test_loss, outs, outputsval + +def count_classes(targets, num_classes): + """ Count number of samples per class in labeled dataset. + + Args: + targets: torch.Tensor (num_data,) with ground truth labels in the dataset + num_classes: int representing number of classes in the dataset + Returns: + counts: torch.Tensor (num_classes, ) with number of samples per class + """ + counts = torch.zeros(num_classes) + for i in range(num_classes): + counts[i] = (targets == i).sum().float() + return counts + +def learn_calibration2(model_outputs, targets, lr, iters, weights): + ''' Implements Bias-Corrected Temperature Scaling (BCTS) from https://arxiv.org/pdf/1901.06852.pdf. + + Code modified from: + https://github.com/gpleiss/temperature_scaling/blob/master/temperature_scaling.py + Args: + model_outputs: torch.Tensor (num_data, num_classes) with outputs of the model before softmax (logits) + targets: torch.Tensor (num_data,) with ground truth labels coresponding to the predictions + lr: float representing learning rate + iters: int specifying number of iterartions + Returns: + T: float with learned temperarture + b: torch.Tensor (num_classes,) with learned biases + ''' + T = torch.tensor([1.], requires_grad=True) + b = torch.ones(model_outputs.shape[1], requires_grad=True) + + nll_criterion = nn.CrossEntropyLoss(weight=weights) + + before_temperature_nll = nll_criterion(model_outputs, targets).item() + + print('Before calibration - NLL: %.3f ' % (before_temperature_nll)) + + optimizer = optim.LBFGS([T, b], lr=lr, max_iter=iters) + def eval(): + loss = nll_criterion(model_outputs/T + b, targets) + loss.backward() + return loss + optimizer.step(eval) + + # Calculate NLL and ECE after temperature scaling + after_temperature_nll = nll_criterion(model_outputs/T + b, targets).item() + print('After calibration - NLL: %.3f ' % (after_temperature_nll)) + return T.item(), b.detach() + + +def compute_true_w(train_labels, test_labels, n_class, m_train, m_test): + # compute the true w + mu_y_train = np.zeros(n_class) + for i in range(n_class): + mu_y_train[i] = float(len(np.where(train_labels == i)[0]))/m_train + mu_y_test = np.zeros(n_class) + for i in range(n_class): + mu_y_test[i] = float(len(np.where(test_labels == i)[0]))/m_test + true_w = mu_y_test/mu_y_train + #print('True w is', true_w) + return true_w + + + +def adjust_predictions(predictions, trainset_priors, test_set_distribution=None): + """ Adjust classifier's predictions to prior shift, + knowing the training set distribution and a different test set distribution. + I.e. predictions are multiplied by the ratio of class priors. + + Code modified from: + https://github.com/sulc/priors-example/blob/master/cifar-priors-example.ipynb + Args: + predictions: torch.Tensor (num_data, num_classes) with predictions + trainset_priors: torch.Tensor (num_classes,) + test_set_distribution: torch.Tensor (num_classes,); if None - use uniform distribution + Returns: + adjust_predictions: torch.Tensor (num_data, num_classes) with adjusted predictions + """ + if test_set_distribution is None: + test_set_distribution = torch.ones(trainset_priors.shape) + adjusted_predictions = predictions * test_set_distribution / trainset_priors + adjusted_predictions = adjusted_predictions / torch.sum(adjusted_predictions, dim=1).unsqueeze(1) # normalize to sum to 1 + return adjusted_predictions + +def BBDM_gradient1(predictions, trainset_priors, test_set_distribution=None): + """ Adjust classifier's predictions to prior shift, + knowing the training set distribution and a different test set distribution. + I.e. predictions are multiplied by the ratio of class priors. + + Code modified from: + https://github.com/sulc/priors-example/blob/master/cifar-priors-example.ipynb + Args: + predictions: torch.Tensor (num_data, num_classes) with predictions + trainset_priors: torch.Tensor (num_classes,) + test_set_distribution: torch.Tensor (num_classes,); if None - use uniform distribution + Returns: + adjust_predictions: torch.Tensor (num_data, num_classes) with adjusted predictions + """ + if test_set_distribution is None: + test_set_distribution = torch.ones(trainset_priors.shape) + adjusted_prediction = predictions/ trainset_priors + adjusted_predictions = predictions * test_set_distribution / trainset_priors + #adjusted_predictions = adjusted_prediction / torch.sum(adjusted_predictions, dim=1).unsqueeze(1) + adjusted_predictions = adjusted_prediction / torch.sum(adjusted_predictions, dim=1).unsqueeze(1)# normalize to sum to 1 + gra1 = torch.mean(adjusted_predictions, dim=0) # normalize to sum to 1 + return gra1 + +def estimate_priors_from_predictions(predictions): + """ Estimate class priors from predictions. + + Code modified from: + https://github.com/sulc/priors-example/blob/master/cifar-priors-example.ipynb + Args: + predictions: torch.Tensor (num_data, num_classes) with predictions + Returns: + priors: torch.Tensor (num_classes) with estimated class priors + """ + + priors = torch.mean(predictions, dim=0) + return priors + + +def next_step_projectedGA_with_prior(x, a, learning_rate, alpha, prior_relative_weight = 1.0): + """ + Code modified from: + https://github.com/sulc/priors-example/blob/master/cifar-priors-example.ipynb + """ + g = compute_gradient(x,a) + g_prior = log_dirichlet_gradient(x, alpha) + nx = x + learning_rate * (g + prior_relative_weight * g_prior) + nx = simplex_projection(nx) + nx = nx / nx.sum() + return nx + +def compute_gradient(x,a): + """ + Compute gradient from Eq. 12 from: + http://openaccess.thecvf.com/content_ICCVW_2019/papers/TASK-CV/Sulc_Improving_CNN_Classifiers_by_Estimating_Test-Time_Priors_ICCVW_2019_paper.pdf + + Code modified from: + https://github.com/sulc/priors-example/blob/master/cifar-priors-example.ipynb + """ + d = torch.sum(a*x, dim=1) + g = torch.sum(a*(1/d.unsqueeze(1)), dim=0) + return g + +def log_dirichlet_gradient(x, alpha, numerical_min_prior=1e-8): + """ + Compute gradient from Eq. 15 from: + http://openaccess.thecvf.com/content_ICCVW_2019/papers/TASK-CV/Sulc_Improving_CNN_Classifiers_by_Estimating_Test-Time_Priors_ICCVW_2019_paper.pdf + + Code modified from: + https://github.com/sulc/priors-example/blob/master/cifar-priors-example.ipynb + """ + g = (alpha - 1) / torch.max(input=x, other=torch.Tensor([numerical_min_prior])) + return g + +def simplex_projection(y): + """ + Projection onto the probability simplex, based on https://eng.ucmerced.edu/people/wwang5/papers/SimplexProj.pdf + + Code modified from: + https://github.com/sulc/priors-example/blob/master/cifar-priors-example.ipynb + """ + u = -np.sort(-y.numpy()) # sort y in descending order + j = np.arange(1, len(y)+1) + phi_obj = u + 1/j * (1-np.cumsum(u)) + positive = np.argwhere(phi_obj > 0) + if positive.size == 0: raise ValueError("Numerical issues - extremely large values after update.. DECREASE LEARNING RATE") + phi = positive.max() + 1 + lam = 1/phi * (1-np.sum(u[:phi])) + x = np.maximum(y+lam,0) + + return torch.Tensor(x) + +def BBDM_estimation(predictions, trainset_priors,K1,K2,K3, termination_difference=0.0001, lam=0.9 ,max_iter=3000, verbose=False): + """ + Args: + predictions: torch.Tensor (num_data, num_classes) with predictions + trainset_priors: torch.Tensor (num_classes,) with the train set distribution + test_init_distribution: torch.Tensor (num_classes,) to initialize test set distribution. + If None, use trainset_priors. + termination_error: float defining the distance of posterior predictions for termination. + Returns: + new_predictions: torch.Tensor (num_data, num_classes) with adjusted predictions + new_testset_priors: torch.Tensor (num_classes,) with the estimated test set distribution + """ + E=torch.ones(len(trainset_priors)) + test_init_distribution = trainset_priors.detach().clone() + testset_priors = test_init_distribution / torch.sum(test_init_distribution) + theta = testset_priors + obj0 = (1-lam)*(np.transpose(theta.numpy())@K1 @ theta.numpy() - 2*(K2@ theta.numpy())+K3)-lam*( torch.mean(torch.log(torch.sum(predictions * theta / trainset_priors, dim=1))).numpy()) + + lr = 0.1 + for i in range(int(max_iter)): + theta_int =theta + gra1 = BBDM_gradient1(predictions, trainset_priors, theta) + gra2 = torch.tensor(2*K1 @ theta.numpy() - 2*K2) + gra = (1-lam)*gra2 + lam*(E-gra1) + theta_mid = theta - lr*gra + theta1 = simplex_projection(theta_mid.float()) + obj = (1-lam)*(np.transpose(theta1.numpy())@K1 @ theta1.numpy() - 2*(K2@ theta1.numpy())+K3)-lam*( torch.mean(torch.log(torch.sum(predictions * theta1 / trainset_priors, dim=1))).numpy()) + while obj - obj0 >0 and lr>1e-8: + lr =lr*0.8 + else: + theta = theta1 + obj0 =obj + difference = torch.sum((theta - theta_int)**2) + theta = theta1 + if difference < termination_difference*termination_difference: + #print("Finished. Difference", difference, "< termination value", termination_difference) + break + new_predictions = adjust_predictions(predictions, trainset_priors, theta) + + return new_predictions, theta + + +def label2matrix(label): + label = np.array(label) + uq_la = np.unique(label) + c = uq_la.shape[0] + n = label.shape[0] + label_mat = np.zeros((n,c)) + for i in range(c): + index = (label == i) + label_mat[index,i]=1 + return label_mat + + +def train_validate_test(args, device, w, train_model, train_loader, test_loader, validate_loader, test_labels, n_class): + w = torch.tensor(w) + #train_model.load_state_dict(init_state) + w = w.float() + + # best_loss = 100000 + # # model = train_model.to(device)#ConvNet().to(device) + # optimizer = optim.SGD(train_model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4) + # for epoch in range(1, args.epochs_training + 1): + # train(args, train_model, device, train_loader, optimizer, epoch, weight=w) + # # save checkpoint + # if epoch > args.epochs_validation: + # # validation + # _, _, loss,_,_ = test(args, train_model, device, validate_loader, weight=w) + # if loss < best_loss: + # state = { + # 'model': train_model.state_dict(), + # } + # torch.save(state, './checkpoint/ckpt1.pt') + # best_loss = loss + checkpoint = torch.load('./checkpoint/ckpt1.pt') + train_model.load_state_dict(checkpoint['model']) + predictions, _, _,_,_ = test(args, train_model, device, test_loader) + precision, recall, f1, _ = precision_recall_fscore_support(test_labels, predictions, average='macro') + return precision,recall,f1 + +def train_validate_test2(args, device, w, train_model, train_loader, test_loader, test_labels, n_class): + w = torch.tensor(w) + #train_model.load_state_dict(init_state) + w = w.float() + + best_loss = 100000 + # model = train_model.to(device)#ConvNet().to(device) + optimizer = optim.SGD(train_model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4) + for epoch in range(1, args.epochs_estimation + 2): + train(args, train_model, device, train_loader, optimizer, epoch, weight=w) + # save checkpoint + if epoch > args.epochs_validation: + # validation + _, _, loss,_,_ = test(args, train_model, device, test_loader, weight=w) + if loss < best_loss: + state = { + 'model': train_model.state_dict(), + } + torch.save(state, './checkpoint/ckpt1.pt') + best_loss = loss + checkpoint = torch.load('./checkpoint/ckpt1.pt') + train_model.load_state_dict(checkpoint['model']) + predictions, _, _,_,_ = test(args, train_model, device, test_loader) + precision, recall, f1, _ = precision_recall_fscore_support(test_labels, predictions, average='macro') + return precision,recall,f1 + +if __name__ == "__main__": + # Training settings + parser = argparse.ArgumentParser(description='BBDM') + parser.add_argument('--data-name', type=str, default='cifar10', metavar='N', + help='dataset name, mnist or cifar10 (default: mnist)') + parser.add_argument('--training-size', type=int, default=3000, metavar='N', + help='sample size for both training (default: 30000)') + parser.add_argument('--testing-size', type=int, default=10000, metavar='N', + help='sample size for testing (default: 30000)') + parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') + parser.add_argument('--test-batch-size', type=int, default=64, metavar='N', + help='input batch size for testing (default: 1000)') + parser.add_argument('--shift-type', type = int, default = 3, metavar = 'N', + help = 'Label shift type (default: 2)') + parser.add_argument('--shift-para', type = float, default = 0.5, metavar = 'M', + help = 'Label shift paramters (default: 0.2)') + parser.add_argument('--shift-para-aux', type = float, default = None, metavar = 'N', + help = 'Label shift paramters (default: 0.2)') + parser.add_argument('--model', type = str, default='Resnet', metavar='N', + help = 'model type to use (default MLP)') + parser.add_argument('--epochs-estimation', type=int, default=20, metavar='N', + help='number of epochs in weight estimation (default: 40)') + parser.add_argument('--epochs-training', type=int, default=25, metavar='N', + help='number of epochs in training (default: 40)') + parser.add_argument('--epochs-validation', type=int, default=20, metavar='N', + help='number of epochs before run validation set, smaller than epochs training (default: 10)') + parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') + parser.add_argument('--momentum', type=float, default=0.5, metavar='M', + help='SGD momentum (default: 0.5)') + parser.add_argument('--no-cuda', action='store_true', default=False, + help='disables CUDA training') + parser.add_argument('--log-interval', type=int, default=50, metavar='N', + help='how many batches to wait before logging training status') + args = parser.parse_args() + use_cuda = not args.no_cuda and torch.cuda.is_available() + time_start = time.time() # 记录开始时间 + + device = torch.device("cpu") + kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} + if (args.shift_type == 3) or (args.shift_type == 4): + alpha = np.ones(10) * args.shift_para + prob = np.random.dirichlet(alpha) + shift_para = prob + shift_para_aux = args.shift_para_aux + + else: + shift_para = args.shift_para + shift_para_aux = args.shift_para_aux + + + if args.data_name == 'cifar10': + raw_data = CIFAR10_SHIFT('data/cifar10', args.training_size, args.testing_size, args.shift_type, shift_para, parameter_aux=shift_para_aux, target_label=2, + transform=transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]), download=True) + D_in = 3072 + if args.model == 'MLP': + base_model = Net(D_in, 512, 10) + train_model = base_model + else: + print('Using Resnet model for predictive tasks') + base_model = ResNet18() + train_model = base_model + init_state = train_model.state_dict() + else: + raise RuntimeError("Unsupported dataset") + + # saparate into training and testing + + m = len(raw_data) + m_train = raw_data.get_trainsize() + m_test = raw_data.get_testsize() + m_valid = raw_data.get_validsize() + + + train_data = data.Subset(raw_data, range(m_train)) + test_data = data.Subset(raw_data, range(m_train, m_train+m_test)) + valid_data = data.Subset(raw_data, range(m_train+m_test,m_train+m_test+m_valid)) + + train_labels = raw_data.get_train_label() + test_labels = raw_data.get_test_label() + valid_labels = raw_data.get_valid_label() + + train_loader = data.DataLoader(train_data, + batch_size=args.batch_size, shuffle=True, **kwargs) + train2_loader = data.DataLoader(train_data, + batch_size=args.batch_size, shuffle=False, **kwargs) + valid_loader = data.DataLoader(valid_data, + batch_size=args.batch_size, shuffle=False, **kwargs) + test_loader = data.DataLoader(test_data, + batch_size=args.batch_size, shuffle=False, **kwargs) + + base_model = base_model.to(device) + + optimizer = optim.SGD(base_model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4) + best_loss = 100000 + # for epoch in range(1, args.epochs_training + 1): + # train_out = train(args, base_model, device, train_loader, optimizer, epoch) + # # save checkpoint + # if epoch > args.epochs_validation: + # # validation + # _, _, loss,_,_ = test(args, base_model, device, valid_loader) + # if loss < best_loss: + # print('saving model') + # state = { + # 'model': base_model.state_dict(), + # } + # # if not os.path.isdir('checkpoint'): + # # os.mkdir('checkpoint') + # torch.save(state, './checkpoint/ckpt.pt') + # best_loss = loss + checkpoint = torch.load('./checkpoint/ckpt.pt') + base_model.load_state_dict(checkpoint['model']) + pre_train, acc_train, _, _, train_out = test(args, base_model, device, train2_loader) + train_tensor = torch.Tensor(train_out) + pre_valid, acc_valid, _, _, valid_out = test(args, base_model, device, valid_loader) + valid_tensor = torch.Tensor(valid_out) + pre_test, acc_test, _, _, test_out = test(args, base_model, device, test_loader) + test_tensor = torch.Tensor(test_out) + train_label = torch.Tensor(train_labels) + valid_label = torch.Tensor(valid_labels) + test_label = torch.Tensor(test_labels) + + + num_classes = test_tensor.shape[1] + truep = np.zeros(num_classes) + + for i in range(num_classes): + truep[i] = float(len(np.where(train_labels == i)[0]))/m_train + + + + # count samples per class + classes_train = count_classes(train_label, num_classes) + classes_val = count_classes(valid_label, num_classes) + classes_test = count_classes(test_label, num_classes) + + # Classifier calibration + classes_train = classes_train / classes_train.sum() + classes_val = classes_val / classes_val.sum() + weights = classes_train / classes_val + weights = weights / weights.sum() # class weights to compensate the diffirence in train and val. distributions + + # apply calibration + T, b = learn_calibration2(valid_tensor.float(), valid_label.long(), lr=0.0001, iters=1000, weights=weights) + preds_train = torch.softmax(train_tensor/T + b, dim=1) + preds_val = torch.softmax(valid_tensor/T + b, dim=1) + preds_test = torch.softmax(test_tensor/T + b, dim=1) + train_soft = torch.softmax(train_tensor, dim=1).numpy + classifier_priors = torch.mean(preds_train, dim=0) + + + KMM_train = raw_data.get_traindata() + KMM_train = KMM_train.reshape(-1,D_in).astype(np.float64) + KMM_labels = raw_data.get_train_label() + KMM_test = raw_data.get_testdata() + KMM_test = KMM_test.reshape(-1,D_in).astype(np.float64) + kmm = KMM(kernel_type='rbf', gamma=None) + D = label2matrix(KMM_labels) + D = D/truep + K1,K2,K3 = kmm.fit(KMM_train, KMM_test,D,0.001) + + + # Prepare structure for results + alg_list = ['NA', 'BMDM', 'Oracle'] + MSE = dict() + Weight =dict() + Acc = dict() + Fscore =dict() + Recall =dict() + AAFscore =dict() + AARecall =dict() + AAWeight =dict() + AAMSE = dict() + AACC = dict() + #compute weight + Weight['Oracle'] = compute_true_w(train_labels, test_labels, num_classes, m_train, m_test) + + # lam1 = [0.2]#[0, 0.2, 0.4, 0.6, 0.8, 1] + # Acc_BBDM =np.zeros(1) + # AABBDM_min=0 + # for i in range(len(lam1)):#torch.tensor(truep) + # pred1_BBDM, est_BBDM = BBDM_estimation(preds_test, classifier_priors,K1,K2,K3, termination_difference=0.0001,lam = lam1[i],max_iter=3000) + # Weight_BMDM = est_BBDM.numpy()/classifier_priors.numpy() + # Acc_BBDM[i],_,_ = train_validate_test2(args, device, Weight_BMDM , train_model, train_loader, valid_loader, valid_labels, num_classes) + # if Acc_BBDM[i] > AABBDM_min: + # AABBDM_min = Acc_BBDM[i] + # lam_opt = lam1[i] + + # checkpoint = torch.load('./checkpoint/ckpt.pt') + # train_model.load_state_dict(checkpoint['model']) + # Acc['NA'],Recall['NA'],Fscore['NA'] = train_validate_test(args, device, np.ones(num_classes), train_model, train_loader, test_loader, valid_loader, test_labels, num_classes) + lam_opt=0.2 + + pred1_BBDM, est_BBDM = BBDM_estimation(preds_test, classifier_priors,K1,K2,K3, termination_difference=0.0001,lam = lam_opt,max_iter=3000) + Weight['BMDM'] = est_BBDM.numpy()/classifier_priors.numpy() + checkpoint = torch.load('./checkpoint/ckpt.pt') + train_model.load_state_dict(checkpoint['model']) + Acc['BMDM'],Recall['BMDM'],Fscore['BMDM'] = train_validate_test(args, device, Weight['BMDM'], train_model, train_loader, test_loader, valid_loader, test_labels, num_classes) + + # checkpoint = torch.load('./checkpoint/ckpt.pt') + # train_model.load_state_dict(checkpoint['model']) + # Acc['Oracle'],Recall['Oracle'],Fscore['Oracle'] = train_validate_test(args, device, Weight['Oracle'], train_model, train_loader, test_loader, valid_loader, test_labels, num_classes) + + MSE['BMDM'] = np.sum(np.square(Weight['Oracle'] - Weight['BMDM']))/num_classes + + + AAMSE =MSE + AAFscore =Fscore + AARecall =Recall + AAWeight =Weight + AACC = Acc + + print("########FINISHIED!########") + print(AAMSE) + print(AAFscore) + print(AARecall) + print(AAWeight) + print(AACC) \ No newline at end of file -- Gitee From 2fcfd531e32a628d8752c25c99a8a455fa7f4c65 Mon Sep 17 00:00:00 2001 From: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> Date: Thu, 24 Jul 2025 15:48:30 +0000 Subject: [PATCH 5/5] update StreamLearn/tests/test_BBDM_demo.py. Signed-off-by: Xinyue <14948339+xinyue0331@user.noreply.gitee.com> --- StreamLearn/tests/test_BBDM_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/StreamLearn/tests/test_BBDM_demo.py b/StreamLearn/tests/test_BBDM_demo.py index 5b368e5..8552865 100644 --- a/StreamLearn/tests/test_BBDM_demo.py +++ b/StreamLearn/tests/test_BBDM_demo.py @@ -408,7 +408,7 @@ if __name__ == "__main__": help='dataset name, mnist or cifar10 (default: mnist)') parser.add_argument('--training-size', type=int, default=3000, metavar='N', help='sample size for both training (default: 30000)') - parser.add_argument('--testing-size', type=int, default=10000, metavar='N', + parser.add_argument('--testing-size', type=int, default=100, metavar='N', help='sample size for testing (default: 30000)') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') -- Gitee