mindspore 自定义优化器

需求

某些模型使用了一些独特的优化器所以需要自定义

MindSpore和PyTorch都支持用户基于python基本语法及相关算子自定义优化器。在PyTorch中,通过重写__init__step方法,用户可以根据需求自定义优化器,具体用法可以参考这篇教程。MindSpore也支持类似用法,以Momentum为例,使用基础的小算子构建:

from mindspore import Parameter, ops, nn

class MomentumOpt(nn.Optimizer):
    def __init__(self, params, learning_rate, momentum, weight_decay=0.0, loss_scale=1.0, use_nesterov=False):
        super(MomentumOpt, self).__init__(learning_rate, params, weight_decay, loss_scale)
        self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
        self.moments = self.parameters.clone(prefix="moments", init="zeros")
        self.assign = ops.Assign()
    def construct(self, gradients):
        params = self.parameters
        moments = self.moments
        success = None
        for param, mom, grad in zip(params, moments, gradients):
            # 小算子表达
            update = self.momentum * param + mom + self.learning_rate * grad
            success = self.assign(param, update)
        return success

官方文档缺陷

mindspore文档没有给出step的使用方法,如果直接用int类型记录step在动态图没问题,但是在静态图的话会失败

解决办法

用Parameter 来记录步数

我自己自定义的优化器

class RiemannianAdam(Optimizer):
    """RiemannianAdam optimizer"""

    @opt_init_args_register
    def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, \
                 weight_decay=0.0):
        """init fun"""
        super(RiemannianAdam, self).__init__(learning_rate=learning_rate, parameters=params, weight_decay=weight_decay)
        self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
        self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
        self.eps = Tensor(np.array([eps]).astype(np.float32))
        self.sum = ReduceSum(keep_dims=True)
        self.sumFalse = ReduceSum(keep_dims=False)
        self.sqrt = Sqrt()
        self.add = Add()
        self.exp_avg = self.parameters.clone(prefix='exp_avg', init='zeros')
        self.exp_avg_sq = self.parameters.clone(prefix='exp_avg_sq', init='zeros')
        self.step = Parameter(Tensor(0, mstype.int32), name='step')
        self.assign = Assign()
        self.pow = Pow()
        self.mul = Mul()

    def construct(self, gradients):
        """class construction"""
        beta1 = self.beta1
        beta2 = self.beta2
        eps = self.eps
        learning_rate = self.get_lr()
        params = self.parameters
        success = None
        step = self.step
        for exp_avg, exp_avg_sq, param, grad in zip(self.exp_avg, self.exp_avg_sq, params, gradients):
            point = param
            if grad is None:
                continue
            exp_avg_update = self.add(self.mul(exp_avg, beta1), (1 - beta1) * grad)
            exp_avg_sq_update = self.add(self.mul(exp_avg_sq, beta2),
                                         (1 - beta2) * (self.sum(grad * grad, -1))
                                         )
            denom = self.add(self.sqrt(exp_avg_sq_update), eps)
            step += 1
            bias_cor1 = 1 - self.pow(beta1, step)
            bias_cor2 = 1 - self.pow(beta2, step)
            step_size = learning_rate * bias_cor2 ** 0.5 / bias_cor1
            direction = exp_avg_update / denom
            new_point = point - step_size * direction
            step += 1
            self.assign(exp_avg, exp_avg_update)
            self.assign(exp_avg_sq, exp_avg_sq_update)
            success = self.assign(param, new_point)
        self.assign(self.step, step)
        return success
 

 

posted @ 2021-12-11 20:34  度一川  阅读(207)  评论(0)    收藏  举报