256 Star 932 Fork 106

MindSpore / mindarmour

Create your Gitee Account
Explore and code with more than 12 million developers,Free private repositories !:)
Sign up
Clone or Download
model.py 31.55 KB
Copy Edit Raw Blame History
luojianing authored 2023-04-21 14:53 . add ` for values in docs
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Differential privacy model.
"""
from easydict import EasyDict as edict
from mindspore.train.model import Model
from mindspore.train import amp
from mindspore.train.amp import _config_level
from mindspore.common import dtype as mstype
from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
from mindspore.parallel._utils import _get_parallel_mode
from mindspore.train.model import ParallelMode
from mindspore.train.amp import _do_keep_batchnorm_fp32
from mindspore.train.amp import _add_loss_network
from mindspore import context
from mindspore import nn
from mindspore import Tensor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.ops.operations import NPUGetFloatStatus
from mindspore.ops.operations import NPUAllocFloatStatus
from mindspore.ops.operations import NPUClearFloatStatus
from mindspore.ops.operations import ReduceSum
from mindspore.ops.operations import LessEqual
from mindspore.parallel._utils import _get_gradients_mean
from mindspore.parallel._utils import _get_device_num
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
from mindspore.common.parameter import Parameter
from mindspore.nn.wrap.loss_scale import _grad_overflow
from mindspore.nn import Cell
from mindspore import ParameterTuple
from mindarmour.utils.logger import LogUtil
from mindarmour.utils import _check_param as validator
from mindarmour.utils._check_param import check_value_positive, check_param_type
from mindarmour.utils._check_param import check_int_positive, check, check_value_type
from ..mechanisms.mechanisms import _MechanismsParamsUpdater
LOGGER = LogUtil.get_instance()
TAG = 'DP model'
_grad_scale = C.MultitypeFuncGraph("grad_scale")
_reciprocal = P.Reciprocal()
@_grad_scale.register("Tensor", "Tensor")
def tensor_grad_scale(scale, grad):
""" grad scaling """
return grad * F.cast(_reciprocal(scale), F.dtype(grad))
class DPModel(Model):
"""
DPModel is used for constructing a model for differential privacy training.
This class is overload mindspore.train.Model.
For details, please check `Protecting User Privacy with Differential Privacy Mechanism
<https://mindspore.cn/mindarmour/docs/en/master/protect_user_privacy_with_differential_privacy.html#%E5%B7%AE%E5%88%86%E9%9A%90%E7%A7%81>`_.
Args:
micro_batches (int): The number of small batches split from an original
batch. Default: ``2``.
norm_bound (float): The norm bound that is used to clip the gradient of
each sample. Default: ``1.0``.
noise_mech (Mechanisms): The object can generate the different type of
noise. Default: ``None``.
clip_mech (Mechanisms): The object is used to update the adaptive clip.
Default: ``None``.
optimizer (Cell): Optimizer used for differential privacy training, which can be original mindspore
optimizers (for example, Momentum optimizer) or optimizers generated by DPOptimizerClassFactory.
Default: ``nn.Momentum``.
Raises:
ValueError: If `optimizer` is ``None``.
ValueError: If `optimizer` is not DPOptimizer and `noise_mech` is ``None``.
ValueError: If `optimizer` is DPOptimizer and `noise_mech` is not ``None``.
ValueError: If `noise_mech` or DPOptimizer's mech method is adaptive while `clip_mech` is not ``None``.
"""
def __init__(self, micro_batches=2, norm_bound=1.0, noise_mech=None,
clip_mech=None, optimizer=nn.Momentum, **kwargs):
if micro_batches:
self._micro_batches = check_int_positive('micro_batches',
micro_batches)
else:
self._micro_batches = None
norm_bound = check_param_type('norm_bound', norm_bound, float)
norm_bound = check_value_positive('norm_bound', norm_bound)
norm_bound = Tensor(norm_bound, mstype.float32)
self._norm_bound = Parameter(norm_bound, 'norm_bound')
if optimizer is None:
msg = 'Optimizer need to be set, but got None.'
LOGGER.error(TAG, msg)
raise ValueError(msg)
opt_name = optimizer.__class__.__name__
# Check whether noise_mech and DPOptimizer are both None or not None, if so, raise ValueError.
# And check whether noise_mech or DPOtimizer's mech method is adaptive while clip_mech is not None,
# if so, raise ValuerError too.
if noise_mech is not None and "DPOptimizer" in opt_name:
msg = 'DPOptimizer is not supported while noise_mech is not None'
LOGGER.error(TAG, msg)
raise ValueError(msg)
if noise_mech is None:
if "DPOptimizer" in opt_name:
if 'Ada' in optimizer._mech.__class__.__name__ and clip_mech is not None:
msg = "When DPOptimizer's mech method is adaptive, clip_mech must be None."
LOGGER.error(TAG, msg)
raise ValueError(msg)
else:
msg = 'DPModel should set noise_mech or DPOptimizer configure, ' \
'please refer to example.'
LOGGER.error(TAG, msg)
raise ValueError(msg)
self._noise_mech = noise_mech
if noise_mech is not None:
if 'Ada' in noise_mech.__class__.__name__ and clip_mech is not None:
msg = 'When noise_mech is Adaptive, clip_mech must be None.'
LOGGER.error(TAG, msg)
raise ValueError(msg)
if clip_mech is None or isinstance(clip_mech, Cell):
self._clip_mech = clip_mech
super(DPModel, self).__init__(optimizer=optimizer, **kwargs)
# judge device_target, only GPU or Ascend is supported until now
device = context.get_context("device_target")
if device not in ["GPU", "Ascend"]:
msg = "'device_target' for DP training should be 'GPU' or 'Ascend', but got {}.".format(device)
LOGGER.error(TAG, msg)
raise ValueError(msg)
def _amp_build_train_network(self, network, optimizer, loss_fn=None,
level='O0', **kwargs):
"""
Build the mixed precision training cell automatically.
Args:
network (Cell): Definition of the network.
loss_fn (Union[None, Cell]): Definition of the loss_fn. If None,
the `network` should have the loss inside. Default: ``None``.
optimizer (Optimizer): Optimizer to update the Parameter.
level (str): Supports [O0, O2]. Default: "O0".
- O0: Do not change.
- O2: Cast network to float16, keep batchnorm and `loss_fn`
(if set) run in float32, using dynamic loss scale.
cast_model_type (:class:`mindspore.dtype`): Supports `mstype.float16`
or `mstype.float32`. If set to `mstype.float16`, use `float16`
mode to train. If set, overwrite the level setting.
keep_batchnorm_fp32 (bool): Keep Batchnorm run in `float32`. If set,
overwrite the level setting.
loss_scale_manager (Union[None, LossScaleManager]): If None, not
scale the loss, or else scale the loss by LossScaleManager.
If set, overwrite the level setting.
"""
check_value_type('network', network, nn.Cell, None)
check_value_type('optimizer', optimizer, nn.Optimizer, None)
check('level', level, "", ['O0', 'O2'], validator.IN, None)
self._check_kwargs(kwargs)
config = dict(_config_level[level], **kwargs)
config = edict(config)
if config.cast_model_type == mstype.float16:
network.to_float(mstype.float16)
if config.keep_batchnorm_fp32:
_do_keep_batchnorm_fp32(network)
if loss_fn:
network = _add_loss_network(network, loss_fn,
config.cast_model_type)
if _get_parallel_mode() in (
ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
network = _VirtualDatasetCell(network)
loss_scale = 1.0
if config.loss_scale_manager is not None:
loss_scale_manager = config.loss_scale_manager
loss_scale = loss_scale_manager.get_loss_scale()
update_cell = loss_scale_manager.get_update_cell()
if update_cell is not None:
# only cpu not support `TrainOneStepWithLossScaleCell` for control flow.
if not context.get_context("enable_ge") and context.get_context(
"device_target") == "CPU":
msg = "Only `loss_scale_manager=None` and " \
"`loss_scale_manager=FixedLossScaleManager(drop_overflow" \
"_update=False)` are supported in current version. " \
"If you use `O2` option, please use " \
"`loss_scale_manager=None` or `FixedLossScaleManager`"
LOGGER.error(TAG, msg)
raise ValueError(msg)
network = _TrainOneStepWithLossScaleCell(network,
optimizer,
scale_update_cell=update_cell,
micro_batches=self._micro_batches,
norm_bound=self._norm_bound,
clip_mech=self._clip_mech,
noise_mech=self._noise_mech).set_train()
return network
network = _TrainOneStepCell(network,
optimizer,
self._norm_bound,
loss_scale,
micro_batches=self._micro_batches,
clip_mech=self._clip_mech,
noise_mech=self._noise_mech).set_train()
return network
def _build_train_network(self):
"""Build train network"""
network = self._network
if self._micro_batches:
if self._optimizer:
if self._loss_scale_manager_set:
network = self._amp_build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
network = self._amp_build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
network = nn.WithLossCell(network, self._loss_fn)
else:
if self._optimizer:
if self._loss_scale_manager_set:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
network = nn.WithLossCell(network, self._loss_fn)
if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL,
ParallelMode.AUTO_PARALLEL):
network.set_auto_parallel()
return network
class _ClipGradients(nn.Cell):
"""
Clip gradients.
Inputs:
grads (tuple[Tensor]): Gradients to clip.
clip_norm (float): The l2-norm bound used to clip the gradients.
cur_norm (float): The l2-norm of grads. If None, the norm will be
calculated in this function. Default: ``None``.
Outputs:
tuple[Tensor], clipped gradients.
"""
def __init__(self):
super(_ClipGradients, self).__init__()
self._add = P.Add()
self._reduce_sum = P.ReduceSum()
self._square_all = P.Square()
self._sqrt = P.Sqrt()
def construct(self, grads, clip_norm, cur_norm=None):
"""
construct a compute flow.
"""
if cur_norm is None:
# calculate current l2-norm of grads
square_sum = Tensor(0, mstype.float32)
for grad in grads:
square_sum = self._add(square_sum, self._reduce_sum(self._square_all(grad)))
cur_norm = self._sqrt(square_sum)
if cur_norm <= clip_norm:
return grads
new_grads = ()
for grad in grads:
clipped_grad = grad * (clip_norm / cur_norm)
new_grads = new_grads + (clipped_grad,)
return new_grads
class _TupleAdd(nn.Cell):
def __init__(self):
super(_TupleAdd, self).__init__()
self.add = P.Add()
self.hyper_map = C.HyperMap()
def construct(self, input1, input2):
"""Add two tuple of data."""
out = self.hyper_map(self.add, input1, input2)
return out
class _TrainOneStepWithLossScaleCell(Cell):
r"""
Network training with loss scaling.
This is a training step with loss scaling. It takes a network, an optimizer
and possibly a scale update Cell as args. The loss scale value can be
updated in both host side or device side. The TrainOneStepWithLossScaleCell
will be compiled to be graph which takes `data`, `label`, `sens` as input
data. The `sens` is acting as loss scaling value. If you want to update it
on host side, the value should be provided. If `sens` is not given, the loss
scale update logic should be provied by `scale_update_cell`. If
`scale_update_cell` is not None and `sens` is provided, the
`scale_update_cell` will be ignored.
Args:
network (Cell): The training network.
optimizer (Cell): Optimizer for updating the weights.
scale_update_cell(Cell): The loss scaling update logic cell.
Default: ``None``.
micro_batches (int): The number of small batches split from an original
batch. Default: ``None``.
norm_bound (Tensor): The norm bound that is used to clip the gradient of
each sample. Default: 1.0.
noise_mech (Mechanisms): The object can generate the different type of
noise. Default: ``None``.
Inputs:
- **inputs** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **scaling_sens** (Tensor) - Tensor of shape :math:`()`.
Outputs:
Tuple of 3 Tensor, the loss, overflow flag and current loss scaling value.
- **loss** (Tensor) - Tensor with shape :math:`()`.
- **overflow** (Tensor) - Tensor with shape :math:`()`, type is bool.
- **loss_scale** (Tensor) - Tensor with shape :math:`()`.
"""
def __init__(self, network, optimizer, scale_update_cell=None,
micro_batches=None, norm_bound=1.0, noise_mech=None,
clip_mech=None):
super(_TrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False)
self.network = network
self.network.set_grad()
self.network.add_flags(defer_inline=True)
self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer
self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.hyper_map = C.HyperMap()
if context.get_context("device_target") == "GPU":
self.gpu_target = True
self.float_status = P.FloatStatus()
self.addn = P.AddN()
self.reshape = P.Reshape()
else:
self.gpu_target = False
self.alloc_status = NPUAllocFloatStatus()
self.get_status = NPUGetFloatStatus()
self.clear_status = NPUClearFloatStatus()
self.reduce_sum = ReduceSum(keep_dims=False)
self.base = Tensor(1, mstype.float32)
self.less_equal = LessEqual()
self.allreduce = P.AllReduce()
self.parallel_mode = _get_parallel_mode()
self.grad_reducer = F.identity
self.reducer_flag = self.parallel_mode in [ParallelMode.DATA_PARALLEL,
ParallelMode.HYBRID_PARALLEL]
if self.reducer_flag:
mean = _get_gradients_mean()
degree = _get_device_num()
self.grad_reducer = DistributedGradReducer(optimizer.parameters,
mean, degree)
self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE
self.loss_scale = None
self.loss_scaling_manager = scale_update_cell
if scale_update_cell:
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32),
name="loss_scale")
self.add_flags(has_effect=True)
# dp params
self._micro_batches = micro_batches
self._norm_bound = norm_bound
self._split = P.Split(0, self._micro_batches)
self._clip_by_global_norm = _ClipGradients()
self._noise_mech = noise_mech
self._clip_mech = clip_mech
self._add = P.Add()
self._norm = nn.Norm()
self._tuple_add = _TupleAdd()
self._hyper_map = C.HyperMap()
self._micro_float = Tensor(micro_batches, mstype.float32)
self._zero = Tensor(0, mstype.float32)
self._assign = P.Assign()
self._div = P.Div()
self._sqrt = P.Sqrt()
self._reduce_sum = P.ReduceSum()
self._square_all = P.Square()
self._less = P.Less()
self._cast = P.Cast()
self._noise_mech_param_updater = None
if self._noise_mech is not None and self._noise_mech._decay_policy is not None:
self._noise_mech_param_updater = _MechanismsParamsUpdater(
decay_policy=self._noise_mech._decay_policy,
decay_rate=self._noise_mech._noise_decay_rate,
cur_noise_multiplier=self._noise_mech._noise_multiplier,
init_noise_multiplier=self._noise_mech._initial_noise_multiplier)
def construct(self, data, label, sens=None):
"""
construct a compute flow.
"""
init = False
if not self.gpu_target:
# init overflow buffer
init = self.alloc_status()
# clear overflow buffer
self.clear_status(init)
if sens is None:
scaling_sens = self.loss_scale
else:
scaling_sens = sens
# DP clip
weights = self.weights
record_datas = self._split(data)
record_labels = self._split(label)
# first index
loss = self.network(record_datas[0], record_labels[0])
scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens,
F.dtype(loss))
record_grad = self.grad(self.network, weights)(record_datas[0],
record_labels[0],
scaling_sens_filled)
beta = self._zero
square_sum = self._zero
for grad in record_grad:
square_sum = self._add(square_sum,
self._reduce_sum(self._square_all(grad)))
norm_grad = self._sqrt(square_sum)
beta = self._add(beta,
self._cast(self._less(norm_grad, self._norm_bound),
mstype.float32))
record_grad = self._clip_by_global_norm(record_grad,
self._norm_bound, norm_grad)
grads = record_grad
total_loss = loss
for i in range(1, self._micro_batches):
loss = self.network(record_datas[i], record_labels[i])
scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens,
F.dtype(loss))
record_grad = self.grad(self.network, weights)(record_datas[i],
record_labels[i],
scaling_sens_filled)
square_sum = self._zero
for grad in record_grad:
square_sum = self._add(square_sum,
self._reduce_sum(self._square_all(grad)))
norm_grad = self._sqrt(square_sum)
beta = self._add(beta,
self._cast(self._less(norm_grad, self._norm_bound),
mstype.float32))
record_grad = self._clip_by_global_norm(record_grad,
self._norm_bound, norm_grad)
grads = self._tuple_add(grads, record_grad)
total_loss = P.Add()(total_loss, loss)
loss = P.Div()(total_loss, self._micro_float)
beta = self._div(beta, self._micro_batches)
if self._noise_mech is not None:
grad_noise_tuple = ()
for grad_item in grads:
grad_noise = self._noise_mech(grad_item)
grad_noise_tuple = grad_noise_tuple + (grad_noise,)
grads = self._tuple_add(grads, grad_noise_tuple)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float),
grads)
# update mech parameters
if self._noise_mech_param_updater is not None:
multiplier = self._noise_mech_param_updater()
loss = F.depend(loss, multiplier)
grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
# apply grad reducer on grads
grads = self.grad_reducer(grads)
# get the overflow buffer
if not self.gpu_target:
self.get_status(init)
# sum overflow buffer elements, 0:not overflow , >0:overflow
flag_sum = self.reduce_sum(init, (0,))
else:
flag_sum = self.hyper_map(F.partial(_grad_overflow), grads)
flag_sum = self.addn(flag_sum)
# convert flag_sum to scalar
flag_sum = self.reshape(flag_sum, (()))
if self.is_distributed:
# sum overflow flag over devices
flag_reduce = self.allreduce(flag_sum)
cond = self.less_equal(self.base, flag_reduce)
else:
cond = self.less_equal(self.base, flag_sum)
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
# if there is no overflow, do optimize
if overflow:
opt = False
else:
opt = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
if self._clip_mech is not None:
next_norm_bound = self._clip_mech(beta, self._norm_bound)
P.assign(self._norm_bound, next_norm_bound)
return F.depend(ret, opt)
class _TrainOneStepCell(Cell):
r"""
Network training package class.
Wraps the network with an optimizer. The resulting Cell be trained with
input data and label. Backward graph will be created in the construct
function to do parameter updating. Different parallel modes are available
to run the training.
Args:
network (Cell): The training network.
optimizer (Cell): Optimizer for updating the weights.
sens (Number): The scaling number to be filled as the input of back
propagation. Default value is 1.0.
micro_batches (int): The number of small batches split from an original
batch. Default: ``None``.
norm_bound (Tensor): The norm bound that is used to clip the gradient of
each sample. Default: 1.0.
noise_mech (Mechanisms): The object can generate the different type
of noise. Default: ``None``.
clip_mech (Mechanisms): The object is used to update the adaptive clip.
Default: ``None``.
Inputs:
- **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
Outputs:
Tensor, a scalar Tensor with shape :math:`()`.
"""
def __init__(self, network, optimizer, norm_bound=1.0, sens=1.0,
micro_batches=None,
noise_mech=None, clip_mech=None):
super(_TrainOneStepCell, self).__init__(auto_prefix=False)
self.network = network
self.network.set_grad()
self.network.add_flags(defer_inline=True)
self.weights = optimizer.parameters
self.optimizer = optimizer
self.grad = C.GradOperation(get_by_list=True, sens_param=True)
self.sens = sens
self.reducer_flag = False
self.grad_reducer = None
parallel_mode = _get_parallel_mode()
if parallel_mode in (
ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL):
self.reducer_flag = True
if self.reducer_flag:
mean = _get_gradients_mean()
degree = _get_device_num()
self.grad_reducer = DistributedGradReducer(optimizer.parameters,
mean, degree)
# dp params
if micro_batches is None:
msg = 'micro_batches must give in differential privacy, but got value: {}'.format(
micro_batches)
LOGGER.error(TAG, msg)
raise ValueError(msg)
self._micro_batches = micro_batches
self._norm_bound = norm_bound
self._split = P.Split(0, self._micro_batches)
self._clip_by_global_norm = _ClipGradients()
self._noise_mech = noise_mech
self._clip_mech = clip_mech
self._tuple_add = _TupleAdd()
self._add = P.Add()
self._norm = nn.Norm()
self._hyper_map = C.HyperMap()
self._zero = Tensor(0, mstype.float32)
self._assign = P.Assign()
self._div = P.Div()
self._sqrt = P.Sqrt()
self._reduce_sum = P.ReduceSum()
self._square_all = P.Square()
self._less = P.Less()
self._cast = P.Cast()
self._micro_float = Tensor(micro_batches, mstype.float32)
self._noise_mech_param_updater = None
if self._noise_mech is not None and self._noise_mech._decay_policy is not None:
self._noise_mech_param_updater = _MechanismsParamsUpdater(
decay_policy=self._noise_mech._decay_policy,
decay_rate=self._noise_mech._noise_decay_rate,
cur_noise_multiplier=self._noise_mech._noise_multiplier,
init_noise_multiplier=self._noise_mech._initial_noise_multiplier)
def construct(self, data, label):
"""
construct a compute flow.
"""
weights = self.weights
record_datas = self._split(data)
record_labels = self._split(label)
loss = self.network(record_datas[0], record_labels[0])
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
record_grad = self.grad(self.network, weights)(record_datas[0],
record_labels[0], sens)
# calcu norm_grad
square_sum = self._zero
for grad in record_grad:
square_sum = self._add(square_sum, self._reduce_sum(self._square_all(grad)))
norm_grad = self._sqrt(square_sum)
# calcu beta
beta = self._zero
if self._clip_mech is not None:
beta = self._add(beta,
self._cast(self._less(norm_grad, self._norm_bound),
mstype.float32))
record_grad = self._clip_by_global_norm(record_grad,
self._norm_bound, norm_grad)
grads = record_grad
total_loss = loss
for i in range(1, self._micro_batches):
loss = self.network(record_datas[i], record_labels[i])
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
record_grad = self.grad(self.network, weights)(record_datas[i],
record_labels[i],
sens)
# calcu norm_grad
square_sum = self._zero
for grad in record_grad:
square_sum = self._add(square_sum,
self._reduce_sum(self._square_all(grad)))
norm_grad = self._sqrt(square_sum)
# calcu beta
if self._clip_mech is not None:
beta = self._add(beta,
self._cast(self._less(norm_grad, self._norm_bound),
mstype.float32))
record_grad = self._clip_by_global_norm(record_grad,
self._norm_bound, norm_grad)
grads = self._tuple_add(grads, record_grad)
total_loss = P.Add()(total_loss, loss)
loss = self._div(total_loss, self._micro_float)
if self._noise_mech is not None:
grad_noise_tuple = ()
for grad_item in grads:
grad_noise = self._noise_mech(grad_item)
grad_noise_tuple = grad_noise_tuple + (grad_noise,)
grads = self._tuple_add(grads, grad_noise_tuple)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float),
grads)
# update mech parameters
if self._noise_mech_param_updater is not None:
multiplier = self._noise_mech_param_updater()
loss = F.depend(loss, multiplier)
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
if self._clip_mech is not None:
beta = self._div(beta, self._micro_batches)
next_norm_bound = self._clip_mech(beta, self._norm_bound)
self._norm_bound = self._assign(self._norm_bound, next_norm_bound)
loss = F.depend(loss, self._norm_bound)
return F.depend(loss, self.optimizer(grads))
Python
1
https://gitee.com/mindspore/mindarmour.git
git@gitee.com:mindspore/mindarmour.git
mindspore
mindarmour
mindarmour
master

Search