class torch.optim.Optimizer(
params,
defaults
)
For more information, see torch.optim.Optimizer.
class mindspore.nn.Optimizer(
learning_rate,
parameters,
weight_decay=0.0,
loss_scale=1.0
)
For more information, see mindspore.nn.Optimizer.
MindSpore: params
can be passed by interface trainable_params
.
from mindspore import nn
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.conv = nn.Conv2d(3, 64, 3)
self.bn = nn.BatchNorm2d(64)
self.relu = nn.ReLU()
def construct(self, x):
x = self.conv(x)
x = self.bn(x)
out = self.relu(x)
return out
net = Net()
optim_sgd = nn.SGD(params=net.trainable_params())
PyTorch: params
can be passed by interface parameters
.
from torch import optim
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv = nn.Conv2d(3, 64, 3)
self.bn = nn.BatchNorm2d(64)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
net = Net()
optim_sgd = optim.SGD(params=net.parameters(), lr=0.01)
MindSpore:Firstly, get all the parameters in the network by get_parameters
method, then filter parameters under certain conditions, like names of them, and pass it to the optimizer.
from mindspore import nn
net = Net()
all_params = net.get_parameters()
no_conv_params = list(filter(lambda x: "conv" not in x.name, all_params))
optim_sgd = nn.SGD(no_conv_params)
PyTorch:Firstly, get all the parameters in the network by named_parameters
method, then filter parameters under certain conditions, like names of them, and pass it to the optimizer.
from torch import optim
net = Net()
all_params = net.named_parameters()
no_conv_params = []
for pname, p in all_params:
if "conv" not in pname:
no_conv_params.append(p)
optim_sgd = optim.SGD(no_conv_params, lr=0.01)
fix learning rate:same.
dynamic learning rate:
import mindspore as ms
from mindspore import nn
# dynamic_lr
milestone = [2, 5, 10]
learning_rates = [0.1, 0.05, 0.01]
lr_dynamic = nn.dynamic_lr.piecewise_constant_lr(milestone, learning_rates)
print(lr_dynamic)
# learning_rate_schedule
lr_schedule = nn.learning_rate_schedule.PolynomialDecayLR(learning_rate=0.1,
end_learning_rate=0.01,
decay_steps=4,
power=0.5 )
global_step = ms.Tensor(2, ms.int32)
result = lr_schedule(global_step)
print(result)
# lr as input of optimizer
optimizer1 = nn.Momentum(net.trainable_params(), learning_rate=lr_dynamic, momentum=0.9, weight_decay=0.9)
optimizer2 = nn.Momentum(net.trainable_params(), learning_rate=lr_schedule, momentum=0.9, weight_decay=0.9)
[0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01]
0.0736396
from torch import optim
import numpy as np
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
loss_fn = torch.nn.MSELoss()
dataset = [(torch.tensor(np.random.rand(1, 3, 64, 32).astype(np.float32)),
torch.tensor(np.random.rand(1, 64, 62, 30).astype(np.float32)))]
for epoch in range(5):
for input, target in dataset:
optimizer.zero_grad()
output = net(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
scheduler.step()
print(scheduler.get_last_lr())
[0.09000000000000001]
[0.08100000000000002]
[0.07290000000000002]
[0.06561000000000002]
[0.05904900000000002]
Same.
Both PyTorch and MindSpore support grouping parameters and can be used in similar ways. But mindspore only support groups for 'params','weight_decay','lr','grad_centralizaiton'; pytorch support groups for all optimizer inputs.
Currently, there are individual optimizers in Mindspore and pytorch that do not support grouping parameters. For details, refer to the instructions of each optimizer.
MindSpore:
from mindspore import nn
net = Net()
conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
no_conv_params = list(filter(lambda x: "conv" not in x.name, net.trainable_params()))
fix_lr = 0.01
polynomial_decay_lr = nn.learning_rate_schedule.PolynomialDecayLR(learning_rate=0.1,
end_learning_rate=0.01,
decay_steps=4,
power=0.5 )
group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': fix_lr},
{'params': no_conv_params, 'lr': polynomial_decay_lr},
{'order_params': net.trainable_params()}]
optim_sgd = nn.SGD(group_params, learning_rate=0.1)
PyTorch:
from torch import optim
net = Net()
all_params = net.parameters()
conv_params = []
no_conv_params = []
for pname, p in net.named_parameters():
if 'conv' in pname:
conv_params += [p]
else:
no_conv_params += [p]
group_params = [{'params': conv_params, 'weight_decay': 0.01, 'lr': 0.01},
{'params': no_conv_params, 'nesterov': True}]
optim_sgd = optim.SGD(group_params, lr=0.01)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。