代码拉取完成,页面将自动刷新
# %%
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
# from torch.cuda.amp import GradScaler, autocast
from torch.amp.autocast_mode import autocast
# %%
# ! 学习资料:https://github.com/chunhuizhang/pytorch_distribute_tutorials/blob/main/tutorials/01_multi_gpus_data_parallelism.ipynb
# %%
class models(nn.Module):
def __init__(self, input_dim:int, hidden_dim:int, output_dim:int):
super(models, self).__init__()
self.input = nn.Linear(input_dim, hidden_dim)
self.output = nn.Linear(hidden_dim, output_dim)
def forward(self, x:torch.Tensor):
print(f"For Forward-Input Device:{x.device}")
print(f"For Forward-Input Size:{x.size()}")
return self.output(self.input(x))
# %%
class dataset(Dataset):
def __init__(self, rowdim:int, input_dim:int):
super(dataset, self).__init__()
self.data = torch.rand(rowdim, input_dim)
self.label = torch.randint(0, 2,(rowdim,))
def __getitem__(self, idx):
return self.data[idx], self.label[idx]
def __len__(self):
return len(self.data)
# %%
rowdim = 64
input_dim = 32
hidden_dim = 32
output_dim = 1
batchsize = 32
# %%
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if device.type == 'cuda':
num_gpus = torch.cuda.device_count()
print(f"Single Node Has GPU Number:{num_gpus}!")
else:
print("No Find Cuda!")
# %%
net = models(input_dim, hidden_dim, output_dim)
datasets = dataset(rowdim, input_dim)
dataload = DataLoader(datasets, batch_size=batchsize, num_workers=4, shuffle=True)
# %%
# ! 这里需要注意data:torch.Tensor.to('cuda')对数据进行copy,是一个新的变量,变量地址已经发生变化
# eg
a = torch.rand(2,3)
print(f"Device a:{a.is_cuda}") # Device a:False
b = a.to(device)
print(f"Device a:{a.is_cuda}") # Device a:False
print(f"Device b:{b.is_cuda}") # Device b:False
# %%
# ! 那模型是否也是这种情况:models:torch.nn.Module.to('cuda:0'), 针对model是没有这种情况
print(f"Device net:{next(net.parameters()).device}") # Device net:cpu
net_new = net.to(device)
print(f"Device net:{next(net.parameters()).device}") # Device net:cuda:0
print(f"Device net_new:{next(net_new.parameters()).device}") # Device net_new:cuda:0
# %%
net = nn.DataParallel(net)
net.to(device)
# scater = GradScaler()
optimizers = torch.optim.Adam(params=net.parameters(), lr=0.001)
loss_fun = nn.BCEWithLogitsLoss()
# %%
for idx, data in enumerate(dataload):
optimizers.zero_grad()
input = data[0]
label = data[1]
input = input.to(device)
label = label.unsqueeze(-1).to(torch.float16).to(device)
with autocast(device.type):
output = net(input)
print(f"Autocast Output Float:{output.dtype}")
loss = loss_fun(output, label)
print(f"Autocast Loss Float:{loss.dtype}") # 进行了梯度累加
# scater.scale(loss).backward()
# scater.step(optimizers)
# scater.update()
loss.backward()
optimizers.step()
# 用到通信原语中的Reduce汇合
print(f"For Loop-Input Size:{input.size()}, Output Size:{output.size()}")
print(f"For Loop-Cuda Device Number:{input.device}")
# %%
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。