diff --git a/README.md b/README.md index d5e72c8516674264da8e93e2aff6d182b89ea58a..6cea1217d647ac7ccf2ab915729269581e7cbeaf 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ GDRO: [np.float32(2.684156), np.float32(2.5641873), np.float32(2.473041), np.flo ### 3.2 已实现的算法 -> GDRO, WGDRO, MERO, WMERO, DGNN, SWFD, SLB, ODS, BBDM, SAFC, CIL-MEMO +> GDRO, WGDRO, MERO, WMERO, FTSL, FTFSL, DGNN, SWFD, SLB, ODS, BBDM, SAFC, CIL-MEMO 这里对本项目中已经实现的算法原理和功能进行简要的介绍,详情请参考末尾的参考文献。 @@ -85,6 +85,8 @@ GDRO: [np.float32(2.684156), np.float32(2.5641873), np.float32(2.473041), np.flo 传统的分布鲁棒优化(DRO)问题侧重于最小化一组分布中的最大风险,然而,Agarwal和Zhang(2022)提出的MERO算法采用超额风险来进一步优化鲁棒性,相比传统DRO,MERO能够更有效地抑制异质噪声带来的不利影响。我们开发了高效的随机逼近算法,通过随机凸优化技术,针对MERO问题进行优化,将其转化为具有偏梯度的随机凸-凹优化(SCCO)问题。尽管偏差使得现有的理论不完全适用,但我们证明了该偏差是可控的,从而保证了接近最优的收敛速度。此外,我们还提出了一种新的基于分布的随机优化方法,进一步提高了算法的收敛效率。 +尽管自适应梯度下降算法(ADA-FULL)能够实现数据相关的理论保证,但是它需要维护一个 $d \times d$ 的梯度内积矩阵,导致时空复杂度过高。近年来,Feinberg 等人(2023)使用了基于频繁方向的矩阵近似技术来降低ADA-FULL的计算开销。然而,他们的方法一方面理论保障上依赖于维度$d$,另一方面时间复杂度仍有提升空间。针对上述问题,我们提出了一种高效的自适应梯度下降算法,采用频繁方向技术并在 primal-dual 框架下进行了全新的理论分析。该算法在保持与现有方法相同的时间与空间复杂度前提下,实现了维度无关的理论保证。我们进一步引入了快速频繁方向技术,在不影响理论保障的情况下,进一步提升时间复杂度。 + #### 流式分布式矩阵元素估计 在流数据学习中,针对大规模分布式环境下的矩阵近似和元素估计任务,我们提出了DS-FD算子和分布式NDV算子。DS-FD算子通过动态滑动窗口机制,实现了对数据流中矩阵近似的实时更新,能够有效应对不断变化的数据分布,保证了算法的稳定性与准确性。分布式NDV算子则通过分布式计算框架,支持大规模数据的并行处理,极大提高了流数据处理的计算效率和可扩展性。针对资源受限的环境,这些算法能够在保证高效处理的同时,减少计算资源的消耗。此外,算法也能够适应分布变化的场景,确保在复杂和动态的数据流环境下,持续提供高效且可靠的性能。 @@ -106,7 +108,8 @@ GDRO: [np.float32(2.684156), np.float32(2.5641873), np.float32(2.473041), np.flo 1. [Efficient Stochastic Approximation of Minimax Excess Risk Optimization](https://arxiv.org/abs/2306.00026) 2. [Stochastic Approximation Approaches to Group Distributionally Robust Optimization and Beyond](https://arxiv.org/abs/2302.09267) -3. [Incremental Learning for Simultaneous Augmentation of Feature and Class](https://ieeexplore.ieee.org/document/10227560) -4. [A Model or 603 Exemplars: Towards Memory-Efficient Class-Incremental Learning](https://arxiv.org/abs/2205.13218) -5. [Learnability with Time-Sharing Computational Resource Concerns](https://academic.oup.com/nsr/article/11/10/nwae204/7693120?login=true) -6. [MATRIX SKETCHING IN BANDITS: CURRENT PITFALLS AND NEW FRAMEWORK](https://arxiv.org/pdf/2410.10258) \ No newline at end of file +3. [Dimension-Free Adaptive Subgradient Methods with Frequent Directions](https://icml.cc/virtual/2025/43453) +4. [Incremental Learning for Simultaneous Augmentation of Feature and Class](https://ieeexplore.ieee.org/document/10227560) +5. [A Model or 603 Exemplars: Towards Memory-Efficient Class-Incremental Learning](https://arxiv.org/abs/2205.13218) +6. [Learnability with Time-Sharing Computational Resource Concerns](https://academic.oup.com/nsr/article/11/10/nwae204/7693120?login=true) +7. [MATRIX SKETCHING IN BANDITS: CURRENT PITFALLS AND NEW FRAMEWORK](https://arxiv.org/pdf/2410.10258) \ No newline at end of file diff --git a/StreamLearn/Algorithm/FTSL/FTFSL.py b/StreamLearn/Algorithm/FTSL/FTFSL.py new file mode 100644 index 0000000000000000000000000000000000000000..97165f896d2754edc080beeb40e97adcf3500efa --- /dev/null +++ b/StreamLearn/Algorithm/FTSL/FTFSL.py @@ -0,0 +1,62 @@ +import numpy as np +import torch +from torch import nn +from .Opt_FD import Opt_FTFSL +from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Base.SemiEstimator import StreamAlgorithm + + +class FTFSL(StreamAlgorithm): + def __init__(self, args): + self.args = args + self.name = 'FTFSL' + self.T = args.T + self.lr = args.lr + self.tau_ratio = args.tau_ratio + self.epsilon = args.epsilon + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.w = ResNet18() + self.w = self.w.to(self.device) + if self.device == 'cuda': + self.w = torch.nn.DataParallel(self.w) + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.criterion = nn.CrossEntropyLoss() + self.optimizer = Opt_FTFSL(self.w.parameters(), lr=self.lr, epsilon=self.epsilon, tau_ratio=self.tau_ratio) + + def stream_fit(self, trainloader): + self.w.train() + train_loss=0 + correct=0 + total=0 + for batch_idx, (inputs, targets) in enumerate(trainloader): + inputs, targets = inputs.to(self.device), targets.to(self.device) + self.optimizer.zero_grad() + outputs = self.w(inputs) + loss = self.criterion(outputs, targets) + loss.backward() + self.optimizer.step() + _, predicted = outputs.max(1) + correct += predicted.eq(targets).sum().item() + train_loss += loss.item() + total+=targets.size(0) + print('FTFSL Loss:', train_loss / (batch_idx + 1), 'Accuracy:', 100. * correct / total ) + + def stream_evaluate(self, dataloader): + test_loss = 0 + with torch.no_grad(): + for batch_idx, (inputs, targets) in enumerate(dataloader): + inputs, targets = inputs.to(self.device), targets.to(self.device) + outputs = self.w(inputs) + loss = self.criterion(outputs, targets) + test_loss += loss.item() + return test_loss / (batch_idx + 1) + + def fit(self, stream_dataset): + self.metrics = [] + stream_dataset.data_loader() + for _ in range(self.args.T): + self.metrics.append(self.stream_evaluate(stream_dataset.testloader)) + self.stream_fit(stream_dataset.trainloader) + + def test(self): + print('FTFSL:', self.metrics) \ No newline at end of file diff --git a/StreamLearn/Algorithm/FTSL/FTSL.py b/StreamLearn/Algorithm/FTSL/FTSL.py new file mode 100644 index 0000000000000000000000000000000000000000..98090266f6ffedec5b27087e36069443575ec24d --- /dev/null +++ b/StreamLearn/Algorithm/FTSL/FTSL.py @@ -0,0 +1,62 @@ +import numpy as np +import torch +from torch import nn +from .Opt_FD import Opt_FTSL +from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Base.SemiEstimator import StreamAlgorithm + + +class FTSL(StreamAlgorithm): + def __init__(self, args): + self.args = args + self.name = 'FTSL' + self.T = args.T + self.lr = args.lr + self.tau_ratio = args.tau_ratio + self.epsilon = args.epsilon + + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.w = ResNet18() + if self.device == 'cuda': + self.w = torch.nn.DataParallel(self.w) + self.w = self.w.to(self.device) + self.criterion = nn.CrossEntropyLoss() + self.optimizer = Opt_FTSL(self.w.parameters(), lr=self.lr, epsilon=self.epsilon, tau_ratio=self.tau_ratio) + + def stream_fit(self, trainloader): + self.w.train() + train_loss=0 + correct=0 + total=0 + for batch_idx, (inputs, targets) in enumerate(trainloader): + inputs, targets = inputs.to(self.device), targets.to(self.device) + self.optimizer.zero_grad() + outputs = self.w(inputs) + loss = self.criterion(outputs, targets) + loss.backward() + self.optimizer.step() + _, predicted = outputs.max(1) + correct += predicted.eq(targets).sum().item() + train_loss += loss.item() + total+=targets.size(0) + print('FTSL Loss:', train_loss / (batch_idx + 1), 'Accuracy:', 100. * correct / total ) + + def stream_evaluate(self, dataloader): + test_loss = 0 + with torch.no_grad(): + for batch_idx, (inputs, targets) in enumerate(dataloader): + inputs, targets = inputs.to(self.device), targets.to(self.device) + outputs = self.w(inputs) + loss = self.criterion(outputs, targets) + test_loss += loss.item() + return test_loss / (batch_idx + 1) + + def fit(self, stream_dataset): + self.metrics = [] + stream_dataset.data_loader() + for _ in range(self.args.T): + self.metrics.append(self.stream_evaluate(stream_dataset.testloader)) + self.stream_fit(stream_dataset.trainloader) + + def test(self): + print('FTSL:', self.metrics) \ No newline at end of file diff --git a/StreamLearn/Algorithm/FTSL/Opt_FD.py b/StreamLearn/Algorithm/FTSL/Opt_FD.py new file mode 100644 index 0000000000000000000000000000000000000000..16bfceffa0cfe25df0d691330e76021923d70ec6 --- /dev/null +++ b/StreamLearn/Algorithm/FTSL/Opt_FD.py @@ -0,0 +1,103 @@ +import torch +import math +from torch.optim import Optimizer + +def safe_invert(x, eps=1e-4): + return torch.where(x <= eps, torch.zeros_like(x), 1.0 / x) + +class Opt_FTSL(Optimizer): + def __init__(self, params, lr=0.1,epsilon=0.1, tau_ratio = 0.1, device = 'cuda'): + self.device = device + self.epsilon= epsilon + self.tau_ratio = tau_ratio + defaults = dict(lr=lr) + super(Opt_FTSL, self).__init__(params, defaults) + + def step(self): + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + state = self.state[p] + if 'step' not in state: + state['step'] = 0 + state["init_parameter"] = p.data + state['alpha'] = torch.tensor(self.epsilon, device=self.device, dtype=torch.float) + tau = min(max(5, grad.numel() * self.tau_ratio), 100) + state['past_gradient'] = torch.zeros(grad.numel(), device=self.device) + state['B'] = torch.zeros((int(tau),grad.numel()),device=self.device) + + grad_input = grad.ravel().clone() + state['past_gradient'] += grad_input + state['step'] += 1 + B = state['B'] + B[-1] = grad_input + + _, s, vt = torch.linalg.svd(B, full_matrices=False) + s = torch.where(s < 1e-4, torch.tensor(0.0, device=s.device, dtype=s.dtype), s) + rho = s[-1] + value = (s - rho) * (s + rho) + s = torch.sqrt(torch.clamp(value, min=0)) + state['B'] = vt * torch.sqrt(s).view(-1, 1) + state['alpha'] = state['alpha']+ rho ** 2 + + g = state['past_gradient'].clone() + V = vt.t() + delta = torch.sqrt(state['alpha']).clone() + sigma = s.clone() + temp = V @ ((safe_invert(delta + sigma) * sigma) * (V.t() @ g)) + update = (safe_invert(delta) * (g - temp)).view(p.data.size()) + p.data = state["init_parameter"] - group["lr"] * update + + +class Opt_FTFSL(Optimizer): + def __init__(self, params, lr=0.1,epsilon=0.01,tau_ratio = 0.1, device = 'cuda'): + self.device = device + self.epsilon=epsilon + self.tau_ratio = tau_ratio + defaults = dict(lr=lr) + super(Opt_FTFSL, self).__init__(params, defaults) + + def step(self): + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + + grad = p.grad.data + state = self.state[p] + + if 'step' not in state: + state['step'] = 0 + state['r'] = 0 + state['past_gradient'] = torch.zeros(grad.numel(), device=self.device) + state["init_parameter"] = p.data + state['alpha'] = torch.tensor(self.epsilon, device=self.device, dtype=torch.float) + state['tau'] = int(min(max(5, grad.numel() * self.tau_ratio), 100)) + state['V'] = torch.zeros((grad.numel(), 2 * state['tau']),device=self.device) + state['M'] = torch.zeros((2 * state['tau'], 2 * state['tau']),device=self.device) + + state['step'] += 1 + state['r']+=1 + grad_input = grad.ravel().clone() + state['past_gradient'] += grad_input + + temp = state['V'] @ (state['V'].t() @ grad_input) + state['V'][:, state['r']-1] = temp + V_t = state['V'].clone() + state['M'] = state['M'] + (V_t.t() @ grad_input) @ (V_t.t() @ grad_input).t() + U, s, Ut = torch.linalg.svd(state['M'] , full_matrices=False) + delta = torch.sqrt(state['alpha']).clone() + sigma = torch.sqrt(s) + p_g = state['past_gradient'].ravel().clone() + temp = (1/delta) * (p_g - V_t @ U @ ((safe_invert(delta + sigma) * sigma) * (Ut @ V_t.t() @ p_g))) + p.data = state["init_parameter"] - group["lr"] * temp.view(p.data.size()) + + if state['r'] == 2 * state['tau']: + sigma = s[state['tau']-1] + state['M'] = torch.diag(torch.clamp(s-sigma, min=0)) + state['V'] = state['V'] @ U + state['r'] = state['tau'] - 1 + state['V'][:, -state['tau'] - 1:] = 0 + state['alpha'] += sigma \ No newline at end of file diff --git a/StreamLearn/Config/FTFSL.py b/StreamLearn/Config/FTFSL.py new file mode 100644 index 0000000000000000000000000000000000000000..a25a7c08ead2ec092a617ff686073bcb2eb699f0 --- /dev/null +++ b/StreamLearn/Config/FTFSL.py @@ -0,0 +1,11 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') +parser.add_argument('--lr', type=float, default=1) +parser.add_argument('--tau_ratio', type=float, default=0.1) +parser.add_argument('--epsilon', type=float, default=1) +parser.add_argument('--batch', type=int, default=128) +parser.add_argument('--T', type=int, default=200) +parser.add_argument('--run_time', type=int, default=10) +args, unknown = parser.parse_known_args() diff --git a/StreamLearn/Config/FTSL.py b/StreamLearn/Config/FTSL.py new file mode 100644 index 0000000000000000000000000000000000000000..a25a7c08ead2ec092a617ff686073bcb2eb699f0 --- /dev/null +++ b/StreamLearn/Config/FTSL.py @@ -0,0 +1,11 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') +parser.add_argument('--lr', type=float, default=1) +parser.add_argument('--tau_ratio', type=float, default=0.1) +parser.add_argument('--epsilon', type=float, default=1) +parser.add_argument('--batch', type=int, default=128) +parser.add_argument('--T', type=int, default=200) +parser.add_argument('--run_time', type=int, default=10) +args, unknown = parser.parse_known_args() diff --git a/StreamLearn/Config/README.md b/StreamLearn/Config/README.md index 50eea7bc652cc72499fb31191d3512111a3ee821..53f1b725f509848ad569dea4eb7c1b6c692e433d 100644 --- a/StreamLearn/Config/README.md +++ b/StreamLearn/Config/README.md @@ -52,6 +52,26 @@ - T: total iterations - run_time: number of runs +## FTSL + +- dataset_mode: balance or imbalance +- batch: batch size +- lr: learning rate for FTSL +- tau_ratio: the sketching ratio for FTSL +- epsilon: a positive constant for FTSL +- T: total iterations +- run_time: number of runs + +## FTFSL + +- dataset_mode: balance or imbalance +- batch: batch size +- lr: learning rate for FTFSL +- tau_ratio: the sketching ratio for FTFSL +- epsilon: a positive constant for FTFSL +- T: total iterations +- run_time: number of runs + ## SWFD - dataset_mode: balance or imbalance diff --git a/StreamLearn/Config/Simulator.py b/StreamLearn/Config/Simulator.py index 599865f8294a46ba9a0baf4f305f7df43e7a8740..2c1f1ba35d30ad66a0a58981717dd1fe63e8639b 100644 --- a/StreamLearn/Config/Simulator.py +++ b/StreamLearn/Config/Simulator.py @@ -1,6 +1,8 @@ # config for testing the stream simulator from StreamLearn.Config.GDRO import args as gdro_args from StreamLearn.Config.MERO import args as mero_args +from StreamLearn.Config.FTSL import args as ftsl_args +from StreamLearn.Config.FTSL import args as ftfsl_args from StreamLearn.Config.DGNN import args as dgnn_args from StreamLearn.Config.SWFD import args as swfd_args from StreamLearn.Config.SWFD import model_configs as swfd_model_configs @@ -18,6 +20,8 @@ dataset = { 'wgdro': 'StreamLearn.Dataset.CIFAR10_Dataset.CIFAR10_Dataset', # 1-2 'mero': 'StreamLearn.Dataset.CIFAR10_Dataset.CIFAR10_Dataset', # 1-3 'wmero': 'StreamLearn.Dataset.CIFAR10_Dataset.CIFAR10_Dataset', # 1-3 + 'ftsl': 'StreamLearn.Dataset.CIFAR10_Dataset.CIFAR10_Dataset', # 1-2 + 'ftfsl': 'StreamLearn.Dataset.CIFAR10_Dataset.CIFAR10_Dataset', # 1-2 'dgnn': 'StreamLearn.Dataset.DTDGsDataset.DTDGsDataset', # FIXME 2-1 'swfd': 'StreamLearn.Dataset.FDDataset.FDDataset', # 2-2 'slb': 'StreamLearn.Dataset.CIFAR10_Dataset.CIFAR10_Dataset', # 2-3 @@ -31,6 +35,8 @@ dataset_config = { 'wgdro': gdro_args, 'mero': mero_args, 'wmero': mero_args, + 'ftsl': ftsl_args, + 'ftfsl': ftfsl_args, 'dgnn': dgnn_args.data, 'swfd': swfd_args, 'slb': swfd_args, @@ -44,6 +50,8 @@ model = { 'wgdro': 'StreamLearn.Algorithm.GDRO.WGDRO.WGDRO', 'mero': 'StreamLearn.Algorithm.MERO.MERO.MERO', 'wmero': 'StreamLearn.Algorithm.MERO.WMERO.WMERO', + 'ftsl': 'StreamLearn.Algorithm.FTSL.FTSL.FTSL', + 'ftfsl': 'StreamLearn.Algorithm.FTSL.FTFSL.FTFSL', 'dgnn': 'StreamLearn.Algorithm.DecoupledDGNN.DGNN.DGNN', 'swfd': 'StreamLearn.Algorithm.SlidingWindowFD.DSFD.SeqDSFD', 'slb': 'StreamLearn.Algorithm.SlidingWindowFD.SketchedBandit.SketchedBandit', @@ -57,6 +65,8 @@ model_config = { 'wgdro': gdro_args, 'mero': mero_args, 'wmero': mero_args, + 'ftsl': ftsl_args, + 'ftfsl': ftfsl_args, 'dgnn': dgnn_args, 'swfd': swfd_model_configs, 'slb': swfd_args, diff --git a/StreamLearn/Dataset/CIFAR10_Dataset.py b/StreamLearn/Dataset/CIFAR10_Dataset.py index c73b9ee7295366f086fb08a1753c00e662c61498..c7cd403389338a5e954d08a4d8eaf99f31bd5e0a 100644 --- a/StreamLearn/Dataset/CIFAR10_Dataset.py +++ b/StreamLearn/Dataset/CIFAR10_Dataset.py @@ -4,7 +4,7 @@ from PIL import Image import pandas as pd from torchvision.transforms import transforms from StreamLearn.Dataset.StreamDataset import StreamDataset - +import torchvision, torch class CIFAR10_Dataset(StreamDataset): def __init__(self, args): @@ -135,4 +135,26 @@ class CIFAR10_Dataset(StreamDataset): data[key]['images'] = np.array(data[key]['images']) data[key]['labels'] = np.array(data[key]['labels']) - return data \ No newline at end of file + return data + + def data_loader(self): + transform_train = transforms.Compose([ + transforms.Resize((32, 32)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + ]) + + transform_test = transforms.Compose([ + transforms.Resize((32, 32)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + ]) + self.trainset = torchvision.datasets.CIFAR10( + root='./dataset', train=True, download=True, transform=transform_train) + self.trainloader = torch.utils.data.DataLoader( + self.trainset, batch_size=128, shuffle=True, num_workers=2) + + self.testset = torchvision.datasets.CIFAR10( + root='./dataset', train=False, download=True, transform=transform_test) + self.testloader = torch.utils.data.DataLoader( + self.testset, batch_size=100, shuffle=False, num_workers=2) \ No newline at end of file diff --git a/StreamLearn/tests/test_FTSL.py b/StreamLearn/tests/test_FTSL.py new file mode 100644 index 0000000000000000000000000000000000000000..468d214946352e48e08aa88e41c80fceaa5a40c9 --- /dev/null +++ b/StreamLearn/tests/test_FTSL.py @@ -0,0 +1,55 @@ +import numpy as np +import argparse +import random +from StreamLearn.Algorithm.FTSL.FTSL import FTSL +from StreamLearn.Algorithm.FTSL.FTFSL import FTFSL +import torch +from StreamLearn.Dataset.CIFAR10_Dataset import CIFAR10_Dataset + +def set_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + +def train_and_evaluate_stream_FTSL(args): + metrics = [] + for _ in range(args.run_time): + dataset = CIFAR10_Dataset(args) + dataset.data_loader() + alg = FTSL(args) + loss_t=[] + for __ in range(args.T): + loss_t.append(alg.stream_evaluate(dataset.testloader)) + alg.stream_fit(dataset.trainloader) + metrics.append(loss_t) + +def train_and_evaluate_stream_FTFSL(args): + metrics = [] + for _ in range(args.run_time): + dataset = CIFAR10_Dataset(args) + dataset.data_loader() + alg = FTFSL(args) + loss_t=[] + for __ in range(args.T): + loss_t.append(alg.stream_evaluate(dataset.testloader)) + alg.stream_fit(dataset.trainloader) + metrics.append(loss_t) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') + parser.add_argument('--lr', type=float, default=0.1) + parser.add_argument('--tau_ratio', type=float, default=0.1) + parser.add_argument('--epsilon', type=float, default=0.01) + parser.add_argument('--batch', type=int, default=256) + parser.add_argument('--T', type=int, default=10) + parser.add_argument('--run_time', type=int, default=2) + args = parser.parse_args() + set_seed(2023) + + if args.dataset_mode == 'balance': + train_and_evaluate_stream_FTSL(args) + +if __name__ == '__main__': + main()