From b4174a036030b1c7a39536c8b4776ceaf15d0fc1 Mon Sep 17 00:00:00 2001 From: Howemin Date: Fri, 25 Jul 2025 15:25:03 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=AF=BE=E9=A2=98?= =?UTF-8?q?=E4=B8=80=20code+demo+Readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- StreamLearn/Algorithm/FTSL/FTFSL.py | 17 +++- StreamLearn/Algorithm/FTSL/FTSL.py | 17 +++- StreamLearn/Algorithm/GDRO/GDRO.py | 5 +- StreamLearn/Algorithm/GDRO/WGDRO.py | 7 +- StreamLearn/Algorithm/MERO/MERO.py | 7 +- StreamLearn/Algorithm/MERO/WMERO.py | 9 +- StreamLearn/Base/ResNet.py | 132 -------------------------- StreamLearn/Config/FTFSL.py | 3 +- StreamLearn/Config/FTSL.py | 3 +- StreamLearn/Config/GDRO.py | 1 + StreamLearn/Config/MERO.py | 1 + StreamLearn/Config/PAA.py | 20 ++++ StreamLearn/legacy/README.md | 142 ++++++++++++++++++++++++++-- StreamLearn/tests/Demo_Task_1.py | 128 +++++++++++++++++++++++++ StreamLearn/tests/test_FTSL.py | 1 + StreamLearn/tests/test_GDRO.py | 1 + StreamLearn/tests/test_MERO.py | 1 + 17 files changed, 332 insertions(+), 163 deletions(-) delete mode 100644 StreamLearn/Base/ResNet.py create mode 100644 StreamLearn/Config/PAA.py create mode 100644 StreamLearn/tests/Demo_Task_1.py diff --git a/StreamLearn/Algorithm/FTSL/FTFSL.py b/StreamLearn/Algorithm/FTSL/FTFSL.py index 97165f8..de9d8d4 100644 --- a/StreamLearn/Algorithm/FTSL/FTFSL.py +++ b/StreamLearn/Algorithm/FTSL/FTFSL.py @@ -2,7 +2,7 @@ import numpy as np import torch from torch import nn from .Opt_FD import Opt_FTFSL -from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Network.ResNetTTA import ResNet18 from StreamLearn.Base.SemiEstimator import StreamAlgorithm @@ -12,10 +12,11 @@ class FTFSL(StreamAlgorithm): self.name = 'FTFSL' self.T = args.T self.lr = args.lr + self.num_class = args.num_class self.tau_ratio = args.tau_ratio self.epsilon = args.epsilon self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.w = ResNet18() + self.w = ResNet18(self.num_class) self.w = self.w.to(self.device) if self.device == 'cuda': self.w = torch.nn.DataParallel(self.w) @@ -23,12 +24,15 @@ class FTFSL(StreamAlgorithm): self.criterion = nn.CrossEntropyLoss() self.optimizer = Opt_FTFSL(self.w.parameters(), lr=self.lr, epsilon=self.epsilon, tau_ratio=self.tau_ratio) - def stream_fit(self, trainloader): + def stream_fit(self, trainloader, mode='demo'): self.w.train() train_loss=0 correct=0 total=0 + max_batches = 2 if mode == 'demo' else float('inf') # 限制batch数量 for batch_idx, (inputs, targets) in enumerate(trainloader): + if batch_idx >= max_batches: + break inputs, targets = inputs.to(self.device), targets.to(self.device) self.optimizer.zero_grad() outputs = self.w(inputs) @@ -41,10 +45,13 @@ class FTFSL(StreamAlgorithm): total+=targets.size(0) print('FTFSL Loss:', train_loss / (batch_idx + 1), 'Accuracy:', 100. * correct / total ) - def stream_evaluate(self, dataloader): + def stream_evaluate(self, dataloader, mode='demo'): test_loss = 0 + max_batches = 2 if mode == 'demo' else float('inf') # 限制batch数量 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(dataloader): + if batch_idx >= max_batches: + break inputs, targets = inputs.to(self.device), targets.to(self.device) outputs = self.w(inputs) loss = self.criterion(outputs, targets) @@ -58,5 +65,5 @@ class FTFSL(StreamAlgorithm): self.metrics.append(self.stream_evaluate(stream_dataset.testloader)) self.stream_fit(stream_dataset.trainloader) - def test(self): + def test(self, data): print('FTFSL:', self.metrics) \ No newline at end of file diff --git a/StreamLearn/Algorithm/FTSL/FTSL.py b/StreamLearn/Algorithm/FTSL/FTSL.py index 9809026..4d23abd 100644 --- a/StreamLearn/Algorithm/FTSL/FTSL.py +++ b/StreamLearn/Algorithm/FTSL/FTSL.py @@ -2,7 +2,7 @@ import numpy as np import torch from torch import nn from .Opt_FD import Opt_FTSL -from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Network.ResNetTTA import ResNet18 from StreamLearn.Base.SemiEstimator import StreamAlgorithm @@ -12,23 +12,27 @@ class FTSL(StreamAlgorithm): self.name = 'FTSL' self.T = args.T self.lr = args.lr + self.num_class = args.num_class self.tau_ratio = args.tau_ratio self.epsilon = args.epsilon self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.w = ResNet18() + self.w = ResNet18(self.num_class) if self.device == 'cuda': self.w = torch.nn.DataParallel(self.w) self.w = self.w.to(self.device) self.criterion = nn.CrossEntropyLoss() self.optimizer = Opt_FTSL(self.w.parameters(), lr=self.lr, epsilon=self.epsilon, tau_ratio=self.tau_ratio) - def stream_fit(self, trainloader): + def stream_fit(self, trainloader, mode='demo'): self.w.train() train_loss=0 correct=0 total=0 + max_batches = 2 if mode == 'demo' else float('inf') # 限制batch数量 for batch_idx, (inputs, targets) in enumerate(trainloader): + if batch_idx >= max_batches: + break inputs, targets = inputs.to(self.device), targets.to(self.device) self.optimizer.zero_grad() outputs = self.w(inputs) @@ -41,10 +45,13 @@ class FTSL(StreamAlgorithm): total+=targets.size(0) print('FTSL Loss:', train_loss / (batch_idx + 1), 'Accuracy:', 100. * correct / total ) - def stream_evaluate(self, dataloader): + def stream_evaluate(self, dataloader, mode='demo'): test_loss = 0 + max_batches = 2 if mode == 'demo' else float('inf') # 限制batch数量 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(dataloader): + if batch_idx >= max_batches: + break inputs, targets = inputs.to(self.device), targets.to(self.device) outputs = self.w(inputs) loss = self.criterion(outputs, targets) @@ -58,5 +65,5 @@ class FTSL(StreamAlgorithm): self.metrics.append(self.stream_evaluate(stream_dataset.testloader)) self.stream_fit(stream_dataset.trainloader) - def test(self): + def test(self, data): print('FTSL:', self.metrics) \ No newline at end of file diff --git a/StreamLearn/Algorithm/GDRO/GDRO.py b/StreamLearn/Algorithm/GDRO/GDRO.py index 266178b..2f6f3d4 100644 --- a/StreamLearn/Algorithm/GDRO/GDRO.py +++ b/StreamLearn/Algorithm/GDRO/GDRO.py @@ -2,7 +2,7 @@ import numpy as np import torch from torch import nn, optim -from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Network.ResNetTTA import ResNet18 from StreamLearn.Base.SemiEstimator import StreamAlgorithm @@ -12,11 +12,12 @@ class GDRO(StreamAlgorithm): self.name = 'GDRO' self.T = args.T self.m = args.m + self.num_class = args.num_class self.lr = args.lr_GDRO # Parameters self.eta_q = args.eta_GDRO self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.w = ResNet18() + self.w = ResNet18(self.num_class) self.w = self.w.to(self.device) if self.device == 'cuda': self.w = torch.nn.DataParallel(self.w) diff --git a/StreamLearn/Algorithm/GDRO/WGDRO.py b/StreamLearn/Algorithm/GDRO/WGDRO.py index 2583c43..40dee73 100644 --- a/StreamLearn/Algorithm/GDRO/WGDRO.py +++ b/StreamLearn/Algorithm/GDRO/WGDRO.py @@ -2,7 +2,7 @@ import copy import numpy as np import torch from torch import nn, optim -from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Network.ResNetTTA import ResNet18 from StreamLearn.Base.SemiEstimator import StreamAlgorithm @@ -11,6 +11,7 @@ class WGDRO(StreamAlgorithm): self.args = args self.name = 'WGDRO' self.m = args.m + self.num_class = args.num_class self.lr = args.lr_WGDRO self.n = args.get_n self.nm = np.min(self.n) @@ -18,8 +19,8 @@ class WGDRO(StreamAlgorithm): # Parameters self.eta_q = args.eta_GDRO self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.w = ResNet18() - self.w_prime = ResNet18() + self.w = ResNet18(self.num_class) + self.w_prime = ResNet18(self.num_class) self.w = self.w.to(self.device) self.w_prime = self.w.to(self.device) if self.device == 'cuda': diff --git a/StreamLearn/Algorithm/MERO/MERO.py b/StreamLearn/Algorithm/MERO/MERO.py index f71607b..495ce5b 100644 --- a/StreamLearn/Algorithm/MERO/MERO.py +++ b/StreamLearn/Algorithm/MERO/MERO.py @@ -1,7 +1,7 @@ import numpy as np import torch from torch import nn, optim -from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Network.ResNetTTA import ResNet18 from StreamLearn.Base.SemiEstimator import StreamAlgorithm @@ -11,13 +11,14 @@ class MERO(StreamAlgorithm): self.name = 'MERO' self.T = args.T self.m = args.m + self.num_class = args.num_class self.lr = args.lr_MERO # Parameters self.eta_q = args.eta_MERO self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.w = ResNet18() + self.w = ResNet18(self.num_class) self.w = self.w.to(self.device) - self.w_star = {i: ResNet18().to(self.device) for i in range(self.m)} + self.w_star = {i: ResNet18(self.num_class).to(self.device) for i in range(self.m)} if self.device == 'cuda': self.w = torch.nn.DataParallel(self.w) self.q = torch.ones(self.m) / self.m diff --git a/StreamLearn/Algorithm/MERO/WMERO.py b/StreamLearn/Algorithm/MERO/WMERO.py index 4595951..adc2675 100644 --- a/StreamLearn/Algorithm/MERO/WMERO.py +++ b/StreamLearn/Algorithm/MERO/WMERO.py @@ -2,7 +2,7 @@ import copy import numpy as np import torch from torch import nn, optim -from StreamLearn.Base.ResNet import ResNet18 +from StreamLearn.Network.ResNetTTA import ResNet18 from StreamLearn.Base.SemiEstimator import StreamAlgorithm @@ -11,6 +11,7 @@ class WMERO(StreamAlgorithm): self.args = args self.name = 'WMERO' self.m = args.m + self.num_class = args.num_class self.lr = args.lr_WMERO self.n = args.get_n self.nm = np.min(self.n) @@ -18,11 +19,11 @@ class WMERO(StreamAlgorithm): # Parameters self.eta_q = args.eta_WMERO self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.w = ResNet18() - self.w_prime = ResNet18() + self.w = ResNet18(self.num_class) + self.w_prime = ResNet18(self.num_class) self.w = self.w.to(self.device) self.w_prime = self.w.to(self.device) - self.w_star = {i: ResNet18().to(self.device) for i in range(self.m)} + self.w_star = {i: ResNet18(self.num_class).to(self.device) for i in range(self.m)} if self.device == 'cuda': self.w = torch.nn.DataParallel(self.w) self.q = torch.ones(self.m) / self.m diff --git a/StreamLearn/Base/ResNet.py b/StreamLearn/Base/ResNet.py deleted file mode 100644 index b77694c..0000000 --- a/StreamLearn/Base/ResNet.py +++ /dev/null @@ -1,132 +0,0 @@ -'''ResNet in PyTorch. - -For Pre-activation ResNet, see 'preact_resnet.py'. - -Reference: -[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Deep Residual Learning for Image Recognition. arXiv:1512.03385 -''' -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, in_planes, planes, stride=1): - super(BasicBlock, self).__init__() - self.conv1 = nn.Conv2d( - in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, - stride=1, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion*planes, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(self.expansion*planes) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.bn2(self.conv2(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, in_planes, planes, stride=1): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, - stride=stride, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, self.expansion * - planes, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(self.expansion*planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion*planes, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(self.expansion*planes) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - -class ResNet(nn.Module): - def __init__(self, block, num_blocks, num_classes=10): - super(ResNet, self).__init__() - self.in_planes = 64 - - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, - stride=1, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) - self.linear = nn.Linear(512*block.expansion, num_classes) - - def _make_layer(self, block, planes, num_blocks, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, stride)) - self.in_planes = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = F.avg_pool2d(out, 4) - out = out.view(out.size(0), -1) - out = self.linear(out) - return out - - -def ResNet18(): - return ResNet(BasicBlock, [2, 2, 2, 2]) - - -def ResNet34(): - return ResNet(BasicBlock, [3, 4, 6, 3]) - - -def ResNet50(): - return ResNet(Bottleneck, [3, 4, 6, 3]) - - -def ResNet101(): - return ResNet(Bottleneck, [3, 4, 23, 3]) - - -def ResNet152(): - return ResNet(Bottleneck, [3, 8, 36, 3]) - - -def test(): - net = ResNet18() - y = net(torch.randn(1, 3, 32, 32)) - print(y.size()) - -# test() diff --git a/StreamLearn/Config/FTFSL.py b/StreamLearn/Config/FTFSL.py index a25a7c0..1ce8434 100644 --- a/StreamLearn/Config/FTFSL.py +++ b/StreamLearn/Config/FTFSL.py @@ -6,6 +6,7 @@ parser.add_argument('--lr', type=float, default=1) parser.add_argument('--tau_ratio', type=float, default=0.1) parser.add_argument('--epsilon', type=float, default=1) parser.add_argument('--batch', type=int, default=128) -parser.add_argument('--T', type=int, default=200) +parser.add_argument('--T', type=int, default=10) parser.add_argument('--run_time', type=int, default=10) +parser.add_argument('--num_class', type=float, default=10) args, unknown = parser.parse_known_args() diff --git a/StreamLearn/Config/FTSL.py b/StreamLearn/Config/FTSL.py index a25a7c0..1ce8434 100644 --- a/StreamLearn/Config/FTSL.py +++ b/StreamLearn/Config/FTSL.py @@ -6,6 +6,7 @@ parser.add_argument('--lr', type=float, default=1) parser.add_argument('--tau_ratio', type=float, default=0.1) parser.add_argument('--epsilon', type=float, default=1) parser.add_argument('--batch', type=int, default=128) -parser.add_argument('--T', type=int, default=200) +parser.add_argument('--T', type=int, default=10) parser.add_argument('--run_time', type=int, default=10) +parser.add_argument('--num_class', type=float, default=10) args, unknown = parser.parse_known_args() diff --git a/StreamLearn/Config/GDRO.py b/StreamLearn/Config/GDRO.py index 4fbc977..78dd26d 100644 --- a/StreamLearn/Config/GDRO.py +++ b/StreamLearn/Config/GDRO.py @@ -11,4 +11,5 @@ parser.add_argument('--eta_WGDRO', type=float, default=0.0001) parser.add_argument('--get_n', type=int, default=100) parser.add_argument('--T', type=int, default=100000) parser.add_argument('--run_time', type=int, default=10) +parser.add_argument('--num_class', type=float, default=10) args, unknown = parser.parse_known_args() diff --git a/StreamLearn/Config/MERO.py b/StreamLearn/Config/MERO.py index 28bd72e..9935603 100644 --- a/StreamLearn/Config/MERO.py +++ b/StreamLearn/Config/MERO.py @@ -11,5 +11,6 @@ parser.add_argument('--eta_WMERO', type=float, default=0.00005) parser.add_argument('--get_n', type=int, default=100) parser.add_argument('--T', type=int, default=100000) parser.add_argument('--run_time', type=int, default=20) +parser.add_argument('--num_class', type=float, default=10) args, unknown = parser.parse_known_args() diff --git a/StreamLearn/Config/PAA.py b/StreamLearn/Config/PAA.py new file mode 100644 index 0000000..2fb1a29 --- /dev/null +++ b/StreamLearn/Config/PAA.py @@ -0,0 +1,20 @@ +import argparse + +parser = argparse.ArgumentParser() + +parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') +parser.add_argument('--m', type=float, default=10) +parser.add_argument('--num_classes', type=int, default=10) +parser.add_argument('--buffer_size', type=int, default=2000) +parser.add_argument('--ema_alpha', type=float, default=0.95) +parser.add_argument('--lr_paa', type=float, default=0.001) +parser.add_argument('--lambda1', type=float, default=0.8) +parser.add_argument('--lambda2', type=float, default=0.5) +parser.add_argument('--knn_k', type=int, default=5) +parser.add_argument('--temp1', type=float, default=0.5) +parser.add_argument('--batch', type=int, default=10) +parser.add_argument('--get_n', type=int, default=100) +parser.add_argument('--T', type=int, default=100000) +parser.add_argument('--run_time', type=int, default=20) + +args, unknown = parser.parse_known_args() \ No newline at end of file diff --git a/StreamLearn/legacy/README.md b/StreamLearn/legacy/README.md index 8afed62..791722d 100644 --- a/StreamLearn/legacy/README.md +++ b/StreamLearn/legacy/README.md @@ -7,6 +7,8 @@ ## 算法介绍 - **GDRO & WGDRO**:课题一的GDRO算法及其加权版本,实现同质噪声下吞吐量相同和不同情况下的学习。 - **MERO & WMERO**:课题一的MERO算法及其加权版本,实现异质噪声下吞吐量相同和不同情况下的学习。 +- **FTSL & FTFSL**:课题一的FTSL算法及其加速版本,实现时空高效的自适应学习。 +- **PAA**:课题一的PAA算法,实现在线类增量持续学习。 - **无监督分类**:课题二中的矩阵略图近似优化算法,针对带噪声的CIFAR-10图像分类任务。 - **SAFC**:课题三中的增量学习算法,分为准备阶段(P阶段)和适应阶段(A阶段)。 - **流数据调度**:课题四中的流数据学习算法调度模拟环境,优化流数据任务的资源利用率和时效性。 @@ -61,16 +63,14 @@ def train_and_evaluate_stream_algorithm(): ## 课题一:流数据分布鲁棒学习算法 -### 1.1 分布鲁棒学习概况 +### 1.1 分布鲁棒学习算法 -针对多源异质流数据分布场景,提出了对分布变化鲁棒的算法。具体而言,针对多数据分布吞吐量相同和不同的两种情况,分别设计了 GDRO 算法和加权 GDRO (WGDRO) 算法;进一步地,我们针对异质噪声下的分布鲁棒学习提出了两种超额损失算法,分别为 MERO 算法和加权 MERO (WMERO) 算法。最后,建立了理论最优的样本复杂度、并通过实验验证了这四种方法的有效性。 +针对多源异质流数据中的分布变化场景,我们提出了适应性强的鲁棒算法,以应对流数据分布变化带来的挑战。具体而言,针对多数据分布吞吐量相同和不同的两种情况,分别设计了 GDRO (Group Distributionally Robust Optimization) 算法和 WGDRO (Weighted GDRO) 算法。最后,建立了理论最优的样本复杂度。 数据集:CIFAR-10 源数据处理参考Github([Link][https://github.com/kuangliu/pytorch-cifar/tree/master]),预处理后数据 ([Link][https://pan.baidu.com/s/1kkgJsbhnLkShwlFncTIh3w?pwd=rne7],提取码:rne7),文件下载后解压到 `stream-learn\StreamLearn\Dataset\cifar10_1` 目录下。 数据预处理方法:根据每幅图片的HSV直方图,对图片进行聚类,类别数取为10。 -### 1.2 分布鲁棒学习算法 - 本算法包含`多分布数据集构造`,`GDRO 算法和 WGDRO 算法实现`,`性能测试`三部分,相关代码参见目录: - StreamLearn/Dataset/CIFAR10_Dataset.py @@ -129,9 +129,9 @@ def train_and_evaluate_stream_WGDRO(args): print('WGDRO:', metrics) ``` -### 1.3 分布鲁棒超额损失优化算法 +### 1.2 分布鲁棒超额损失优化算法 -本算法包含`多分布数据集构造`,`MERO 算法和 WMERO 算法实现`,`性能测试`三部分,相关代码参见目录: +我们针对异质噪声下的分布鲁棒学习提出了两种超额损失算法,分别为 MERO (Minimax Excess Risk Optimization) 算法和 WMERO (Weighted MERO) 算法。最后,建立了理论最优的样本复杂度。本算法包含`多分布数据集构造`,`MERO 算法和 WMERO 算法实现`,`性能测试`三部分,相关代码参见目录: - StreamLearn/Dataset/CIFAR10_Dataset.py - StreamLearn/Algorithm/MERO/MERO.py @@ -148,7 +148,7 @@ elif args.dataset_mode == 'imbalance': train_and_evaluate_stream_WMERO(args) ``` -我们按照与1.2节相同的做法构造流式数据集CIFAR10_Dataset,其中参数 `args.dataset_mode='balance' `表明流数据吞吐量相同场景;`args.dataset_mode='imbalance'` 表明吞吐量不同的场景。 +我们按照与1.2节相同的做法构造流式数据集 CIFAR10_Dataset,其中参数 `args.dataset_mode='balance' `表明流数据吞吐量相同场景;`args.dataset_mode='imbalance'` 表明吞吐量不同的场景。 ```python dataset = CIFAR10_Dataset(args) @@ -191,7 +191,135 @@ def train_and_evaluate_stream_WMERO(args): print('WMERO:', metrics) ``` +### 1.3 时空高效的自适应梯度下降算法 + +我们针对流数据场景提出了分布自适应梯度下降算法的两种高效变体,分别为 FTSL (Follow the Sketchy Leader) 算法和 FTFSL (Follow the Fast Sketchy Leader) 算法。最后,建立了维度无关的遗憾界保障。本算法包含`数据集构造`,`FTSL 算法和 FTFSL 算法实现`,`性能测试`三部分,相关代码参见目录: + +- StreamLearn/Dataset/CIFAR10_Dataset.py +- StreamLearn/Algorithm/FTSL/FTSL.py +- StreamLearn/Algorithm/FTSL/FTFSL.py +- StreamLearn/tests/test_FTSL.py + +我们可以分别调用 FTSL 算法和 FTFSL 算法的测试函数,在 CIFAR10_Dataset 上进行数据性能测试: + +```python +# FTSL 算法和 FTFSL 算法的测试代码 +if args.dataset_mode == 'balance': + train_and_evaluate_stream_FTSL(args) / train_and_evaluate_stream_FTFSL(args) +``` + +我们按照与1.2节相同的做法构造流式数据集 CIFAR10_Dataset,其中参数`args.dataset_mode='balance' `表明流数据吞吐量相同场景;之后,我们对数据进行划分,使得数据按照批次到来: + +```python +dataset = CIFAR10_Dataset(args) +dataset.data_loader() +``` + +接着,我们对算法进行初始化,算法使用 ResNet18 神经网络: + +```python +alg = FTSL(args) / alg = FTFSL(args) +``` + +最后,分别对两个模型进行性能测试,完整代码如下: + +```python +# FTSL 算法测试 +def train_and_evaluate_stream_FTSL(args): + metrics = [] + for _ in range(args.run_time): + dataset = CIFAR10_Dataset(args) + dataset.data_loader() + alg = FTSL(args) + loss_t=[] + for __ in range(args.T): + loss_t.append(alg.stream_evaluate(dataset.testloader)) + alg.stream_fit(dataset.trainloader) + metrics.append(loss_t) +``` + +```python +# FTFSL 算法测试 +def train_and_evaluate_stream_FTFSL(args): + metrics = [] + for _ in range(args.run_time): + dataset = CIFAR10_Dataset(args) + dataset.data_loader() + alg = FTFSL(args) + loss_t=[] + for __ in range(args.T): + loss_t.append(alg.stream_evaluate(dataset.testloader)) + alg.stream_fit(dataset.trainloader) + metrics.append(loss_t) +``` + +### 1.4 在线类增量持续学习算法 + +PAA(Prototypes as Anchors)是一种针对在线类增量持续学习(CIL)的流数据学习算法,主要解决动态数据流中的标签噪声(包括闭集噪声和开集噪声)和类别增量问题。其核心思想是: + +1. 原型学习:为每个类别学习具有代表性和判别性的原型(类中心),作为区分样本的 “锚点”; +2. 噪声处理:通过高斯混合模型(GMM)和 KNN 评分实现基于相似度的去噪,过滤开集噪声并修正高置信度的闭集噪声样本; +3. 鲁棒性增强:采用双分类器架构和一致性正则化,确保模型对分布漂移的适应性; +4. 增量适应:通过记忆缓冲区保存历史样本,结合原型对比损失平衡新旧类别知识,缓解灾难性遗忘。 + +代码目录如下:该算法主要包含`数据集`,`PAA算法实现`,`性能测试`三部分,相关代码参见目录: + +- StreamLearn/Algorithm/PAA/PAA.py:算法主逻辑; +- StreamLearn/Algorithm/PAA/paa_model.py:模型结构(ResNet backbone + 双分类器); +- StreamLearn/Algorithm/PAA/paa_buffer.py:缓冲区管理(原型更新与噪声过滤); +- StreamLearn/Algorithm/PAA/paa_proto.py:在线原型学习(EMA 更新与对比损失); +- StreamLearn/Algorithm/PAA/loss.py:损失函数(原型对比损失、一致性损失); +- StreamLearn/Dataset/CIFAR10_Dataset.py:流式数据集构造(支持噪声注入); +- StreamLearn/tests/test_paa.py:性能测试入口。 + +我们介绍重要参数如下 + +| 参数描述 | 参数值 | +| ---------------------------- | ------------------ | +| 记忆缓冲区大小 | `buffer_size=2000` | +| 学习率 | `lr_paa=0.001` | +| 原型 EMA 平滑因子 | `ema_alpha=0.95` | +| GMM 后验概率阈值(去噪) | `lambda1=0.8` | +| KNN 距离阈值(开集噪声过滤) | `lambda2=0.5` | +| 总类别数 | `num_classes=10` | +| 原型对比损失温度参数 | `temp1=0.5` | + +我们可以调用 PAA 算法的测试函数,在 CIFAR10_Dataset 上进行数据性能测试: +首先构造流式数据集 CIFAR10_Dataset + +```python +dataset = CIFAR10_Dataset(args) +``` + +接着使用 ResNet18 神经网络初始化 PAA 算法。 + +```python +alg = PAA(args) +``` + +最后对模型进行性能测试,完整代码如下: + +```python +# PAA 算法测试 +def train_and_evaluate_stream_PAA(args): + metrics = [] + for _ in range(args.run_time): + dataset = CIFAR10_Dataset(args) + dataset.data_loader() + alg = PAA(args) + loss_t = [] + for __ in range(args.T): + loss_t.append(alg.stream_evaluate(dataset.testloader)) + alg.stream_fit(dataset.trainloader) + metrics.append(loss_t) + print('PAA metrics:', metrics) +``` + +常见问题及解决方案: +1. **数据加载错误 `'DataLoader' object is not subscriptable`**:确保 `stream_evaluate` 和 `stream_fit` 接收单批次数据,而非 DataLoader 对象,需遍历 DataLoader 获取批次; +2. **参数不匹配**:初始化 `PAA` 类时需传入正确的 `args` 参数,如 `num_classes` 与数据集类别数一致; +3. **原型为空警告**:检查缓冲区是否正确更新,确保 `add_sample_with_prototype` 方法正常写入原型。 ## 课题二 diff --git a/StreamLearn/tests/Demo_Task_1.py b/StreamLearn/tests/Demo_Task_1.py new file mode 100644 index 0000000..df193d0 --- /dev/null +++ b/StreamLearn/tests/Demo_Task_1.py @@ -0,0 +1,128 @@ +# -*- coding:UTF-8 -*- # +''' +@filename:Demo_Task_1.pY +@author:howemin +@time:2025-07-22 +''' +import logging + +import numpy as np +import argparse +import random +from StreamLearn.tests.test_GDRO import train_and_evaluate_stream_GDRO +from StreamLearn.tests.test_GDRO import train_and_evaluate_stream_WGDRO +from StreamLearn.tests.test_MERO import train_and_evaluate_stream_MERO +from StreamLearn.tests.test_MERO import train_and_evaluate_stream_WMERO +from StreamLearn.tests.test_FTSL import train_and_evaluate_stream_FTSL +from StreamLearn.tests.test_FTSL import train_and_evaluate_stream_FTFSL +from StreamLearn.tests.test_paa import train_and_evaluate_stream_PAA +import torch +from StreamLearn.Dataset.CIFAR10_Dataset import CIFAR10_Dataset + + +def set_seed(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') + parser.add_argument('--m', type=float, default=10) + parser.add_argument('--num_class', type=int, default=10) + parser.add_argument('--batch', type=float, default=100) + parser.add_argument('--get_n', type=int, default=100) + parser.add_argument('--T', type=int, default=100000) + parser.add_argument('--run_time', type=int, default=10) + # GDRO + parser.add_argument('--lr_GDRO', type=float, default=0.005) + parser.add_argument('--eta_GDRO', type=float, default=0.001) + parser.add_argument('--lr_WGDRO', type=float, default=0.0005) + parser.add_argument('--eta_WGDRO', type=float, default=0.0001) + # MERO + parser.add_argument('--lr_MERO', type=float, default=0.005) + parser.add_argument('--eta_MERO', type=float, default=0.0001) + parser.add_argument('--lr_WMERO', type=float, default=0.0001) + parser.add_argument('--eta_WMERO', type=float, default=0.00005) + # FTSL + parser.add_argument('--lr', type=float, default=0.1) + parser.add_argument('--tau_ratio', type=float, default=0.1) + parser.add_argument('--epsilon', type=float, default=0.01) + # PAA + parser.add_argument('--num_classes', type=int, default=10) + parser.add_argument('--buffer_size', type=int, default=300) + parser.add_argument('--ema_alpha', type=float, default=0.95) + parser.add_argument('--lr_paa', type=float, default=0.001) + parser.add_argument('--lambda1', type=float, default=0.8) + parser.add_argument('--lambda2', type=float, default=0.5) + parser.add_argument('--knn_k', type=int, default=5) + parser.add_argument('--temp1', type=float, default=0.5) + + + args = parser.parse_args() + set_seed(2023) + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s" + ) + # GDRO + args.dataset_mode = 'balance' + logging.info("开始运行第一组算法:") + logging.info("(1) 针对多数据分布吞吐量相同情况,运行分布鲁棒优化 GDRO 算法") + train_and_evaluate_stream_GDRO(args) + logging.info("GDRO 算法运行完毕") + + args.dataset_mode = 'imbalance' + logging.info("(2) 针对多数据分布吞吐量不同情况,运行加权分布鲁棒优化 WGDRO 算法") + train_and_evaluate_stream_WGDRO(args) + logging.info("WGDRO 算法运行完毕") + logging.info("第一组算法 GDRO/WGDRO 全部运行完毕") + + # MERO + args.dataset_mode = 'balance' + logging.info("开始运行第二组算法:") + logging.info("(1) 针对异质噪声下多数据分布吞吐量相同情况,运行极小极大超额风险优化 MERO 算法") + train_and_evaluate_stream_MERO(args) + logging.info("MERO 算法运行完毕") + + args.dataset_mode = 'imbalance' + logging.info("(2) 针对异质噪声下多数据分布吞吐量不同情况,运行加权极小极大超额风险优化 WMERO 算法") + train_and_evaluate_stream_WMERO(args) + logging.info("WMERO 算法运行完毕") + logging.info("第二组算法 MERO/WMERO 全部运行完毕") + + # FTSL + args.dataset_mode = 'balance' + args.batch = 256 + args.T = 5 + args.run_time = 1 + logging.info("开始运行第三组算法:") + logging.info("(1) 针对分布不断变化的情况,运行高效的自适应梯度下降 FTSL 算法") + train_and_evaluate_stream_FTSL(args) + logging.info("FTSL 算法运行完毕") + + torch.cuda.empty_cache() + logging.info("(2) 针对分布快速变化的情况,运行加速自适应梯度下降 FTFSL 算法") + train_and_evaluate_stream_FTFSL(args) + logging.info("FTFSL 算法运行完毕") + logging.info("第三组算法 FTSL/FTFSL 全部运行完毕") + + # PAA + args.batch = 2 + args.T = 5 + args.run_time = 1 + logging.info("开始运行第四组算法:") + logging.info("针对动态数据流中的类别增量问题,运行在线类增量持续学习 PAA 算法") + train_and_evaluate_stream_PAA(args) + logging.info("PAA 算法运行完毕") + + + + + + +if __name__ == '__main__': + main() diff --git a/StreamLearn/tests/test_FTSL.py b/StreamLearn/tests/test_FTSL.py index 468d214..e6d9317 100644 --- a/StreamLearn/tests/test_FTSL.py +++ b/StreamLearn/tests/test_FTSL.py @@ -39,6 +39,7 @@ def train_and_evaluate_stream_FTFSL(args): def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') + parser.add_argument('--num_class', type=float, default=10) parser.add_argument('--lr', type=float, default=0.1) parser.add_argument('--tau_ratio', type=float, default=0.1) parser.add_argument('--epsilon', type=float, default=0.01) diff --git a/StreamLearn/tests/test_GDRO.py b/StreamLearn/tests/test_GDRO.py index 66f7dc7..f8a6b6b 100644 --- a/StreamLearn/tests/test_GDRO.py +++ b/StreamLearn/tests/test_GDRO.py @@ -43,6 +43,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') parser.add_argument('--m', type=float, default=10) + parser.add_argument('--num_class', type=float, default=10) parser.add_argument('--lr_GDRO', type=float, default=0.005) parser.add_argument('--eta_GDRO', type=float, default=0.001) parser.add_argument('--batch', type=float, default=100) diff --git a/StreamLearn/tests/test_MERO.py b/StreamLearn/tests/test_MERO.py index 99b4b15..c423c72 100644 --- a/StreamLearn/tests/test_MERO.py +++ b/StreamLearn/tests/test_MERO.py @@ -43,6 +43,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset_mode', type=str, default='imbalance', help='[balance, imbalance]') parser.add_argument('--m', type=float, default=10) + parser.add_argument('--num_class', type=float, default=10) parser.add_argument('--batch', type=float, default=100) parser.add_argument('--lr_MERO', type=float, default=0.005) parser.add_argument('--eta_MERO', type=float, default=0.0001) -- Gitee From fbb1a983795265130d7484731edce70413bc943c Mon Sep 17 00:00:00 2001 From: Howemin Date: Fri, 25 Jul 2025 15:30:53 +0800 Subject: [PATCH 2/3] Delete PAA --- StreamLearn/Config/PAA.py | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 StreamLearn/Config/PAA.py diff --git a/StreamLearn/Config/PAA.py b/StreamLearn/Config/PAA.py deleted file mode 100644 index 2fb1a29..0000000 --- a/StreamLearn/Config/PAA.py +++ /dev/null @@ -1,20 +0,0 @@ -import argparse - -parser = argparse.ArgumentParser() - -parser.add_argument('--dataset_mode', type=str, default='balance', help='[balance, imbalance]') -parser.add_argument('--m', type=float, default=10) -parser.add_argument('--num_classes', type=int, default=10) -parser.add_argument('--buffer_size', type=int, default=2000) -parser.add_argument('--ema_alpha', type=float, default=0.95) -parser.add_argument('--lr_paa', type=float, default=0.001) -parser.add_argument('--lambda1', type=float, default=0.8) -parser.add_argument('--lambda2', type=float, default=0.5) -parser.add_argument('--knn_k', type=int, default=5) -parser.add_argument('--temp1', type=float, default=0.5) -parser.add_argument('--batch', type=int, default=10) -parser.add_argument('--get_n', type=int, default=100) -parser.add_argument('--T', type=int, default=100000) -parser.add_argument('--run_time', type=int, default=20) - -args, unknown = parser.parse_known_args() \ No newline at end of file -- Gitee From 5b33299c37d8eff120be9d2841b31a34ef552c36 Mon Sep 17 00:00:00 2001 From: Howemin Date: Fri, 25 Jul 2025 15:34:51 +0800 Subject: [PATCH 3/3] Update Readme --- StreamLearn/legacy/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/StreamLearn/legacy/README.md b/StreamLearn/legacy/README.md index 791722d..cc16b97 100644 --- a/StreamLearn/legacy/README.md +++ b/StreamLearn/legacy/README.md @@ -148,7 +148,7 @@ elif args.dataset_mode == 'imbalance': train_and_evaluate_stream_WMERO(args) ``` -我们按照与1.2节相同的做法构造流式数据集 CIFAR10_Dataset,其中参数 `args.dataset_mode='balance' `表明流数据吞吐量相同场景;`args.dataset_mode='imbalance'` 表明吞吐量不同的场景。 +我们按照与1.1节相同的做法构造流式数据集 CIFAR10_Dataset,其中参数 `args.dataset_mode='balance' `表明流数据吞吐量相同场景;`args.dataset_mode='imbalance'` 表明吞吐量不同的场景。 ```python dataset = CIFAR10_Dataset(args) @@ -208,7 +208,7 @@ if args.dataset_mode == 'balance': train_and_evaluate_stream_FTSL(args) / train_and_evaluate_stream_FTFSL(args) ``` -我们按照与1.2节相同的做法构造流式数据集 CIFAR10_Dataset,其中参数`args.dataset_mode='balance' `表明流数据吞吐量相同场景;之后,我们对数据进行划分,使得数据按照批次到来: +我们按照与1.1节相同的做法构造流式数据集 CIFAR10_Dataset,其中参数`args.dataset_mode='balance' `表明流数据吞吐量相同场景;之后,我们对数据进行划分,使得数据按照批次到来: ```python dataset = CIFAR10_Dataset(args) -- Gitee