代码拉取完成,页面将自动刷新
# %%
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
# %%
# ! 学习资料:https://github.com/chunhuizhang/pytorch_distribute_tutorials/blob/main/tutorials/04_model_parallel_resnet50.ipynb
# * 手动拆分模型并行
# %%
class models(nn.Module):
def __init__(self, input_dim:int, hidden_dim:int, output_dim:int):
super(models, self).__init__()
# * 手动拆分问题:这种写法将模型分配在固定的GPU,不方便以后的微调,还需要手动调整。
self.input = nn.Linear(input_dim, hidden_dim).to('cuda:0')
self.output = nn.Linear(hidden_dim, output_dim).to('cuda:1')
def forward(self, x:torch.Tensor):
print(f"For Forward-Input Device:{x.device}") # For Forward-Input Device:cpu
print(f"For Forward-Input Size:{x.size()}") # For Forward-Input Size:torch.Size([32, 32])
x = self.input(x.to('cuda:0'))
return self.output(x.to('cuda:1'))
# %%
net = models(32,32,1)
print(f"Net Input Cuda Device:{next(net.input.parameters()).device}") # Net Input Cuda Device:cuda:0
print(f"Net Output Cuda Device:{next(net.output.parameters()).device}") # Net Output Cuda Device:cuda:1
loss_fn = nn.BCEWithLogitsLoss()
optimizer = Adam(net.parameters(), lr=0.001)
# %%
optimizer.zero_grad()
output = net(torch.randn(32, 32))
print(f"Output Cuda Device:{output.device}") # Output Cuda Device:cuda:1
labels = torch.randint(0,2,(32,)).unsqueeze(-1).to(torch.float16)
# %%
loss = loss_fn(output, labels.to('cuda:1'))
print(f"Loss Cuda Device:{loss.device}") # Loss Cuda Device:cuda:1
# %%
loss.backward()
optimizer.step()
# %%
next(net.parameters()).device # device(type='cuda', index=0)
# %%
next(net.output.parameters()).device # device(type='cuda', index=1)
# %%
import torch
import torch.nn as nn
from torchvision.models.resnet import ResNet, Bottleneck
# %%
model = ResNet(Bottleneck, [3, 4, 6, 3])
model
# %%
from torchsummary import summary
# %%
summary(model, input_size=(3, 128, 128), device='cpu')
# %%
class ModelParallelResNet50(ResNet):
def __init__(self, num_classes=10):
super().__init__(Bottleneck, [2, 2, 2, 2], num_classes=num_classes)
self.seq1 = nn.Sequential(
self.conv1,
self.bn1,
self.relu,
self.maxpool,
self.layer1,
self.layer2
).to('cuda:0')
self.seq2 = nn.Sequential(
self.layer3,
self.layer4,
self.avgpool
).to('cuda:1')
self.fc.to('cuda:1')
def forward(self, x):
x = self.seq2(self.seq1(x).to('cuda:1'))
return x
# %%
def model_size(model):
return sum([par.numel() for par in model.parameters()])
# %%
# model_size(ResNet(Bottleneck, [3, 4, 6, 3]))
# %%
# model_size(ModelParallelResNet50())
# %%
num_classes = 10
one_hot_indices = torch.LongTensor(5).random_(0, num_classes).view(5, 1)
one_hot_indices
# %%
labels = torch.zeros(5, num_classes).scatter_(1, one_hot_indices, 1)
labels
# %%
num_classes = 10
num_batches = 3
batch_size = 120
image_w = 128
image_h = 128
def train(model):
model.train()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
one_hot_indices = torch.LongTensor(batch_size).random_(0, num_classes).view(batch_size, 1)
for _ in range(batch_size):
inputs = torch.randn(batch_size, 3, image_w, image_h)
labels = torch.zeros(batch_size, num_classes).scatter_(1, one_hot_indices, 1)
optimizer.zero_grad()
outputs = model(inputs.to('cuda:0'))
labels = labels.to(outputs.device)
loss_fn(outputs, labels).backward()
optimizer.step()
# %%
import matplotlib.pyplot as plt
import numpy as np
import timeit
# %%
num_repeat = 10
stmt = 'train(model)'
setup = "model = ModelParallelResNet50()"
mp_run_times = timeit.repeat(stmt, setup, number=1, repeat=num_repeat, globals=globals())
mp_mean, mp_std = np.mean(mp_run_times), np.std(mp_run_times)
# %%
# 单卡
import torchvision.models as models_res
setup = "model = models_res.resnet18(num_classes=num_classes).to('cuda:0')"
rn_run_times = timeit.repeat(stmt, setup, number=1, repeat=num_repeat, globals=globals())
rn_mean, rn_std = np.mean(rn_run_times), np.std(rn_run_times)
# %%
def plot(means, stds, labels, fig_name):
fig, ax = plt.subplots()
ax.bar(np.arange(len(means)), means, yerr=stds, align='center', alpha=0.5, ecolor='red', capsize=10, width=0.6)
ax.set_ylabel('Resnet50 Excution Time (Second)')
ax.set_xticks(np.arange(len(means)))
ax.set_xticklabels(labels)
ax.yaxis.grid(True)
plt.tight_layout()
# %%
plot([mp_mean, rn_mean],
[mp_std, rn_std],
['Model Parallel', 'Single GPU',],
'mp_vs_rn.png')
# %%
mp_mean, rn_mean
# %%
(mp_mean-rn_mean)/rn_mean
# %%
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。