用条件为变量的流程控制语句在MindSpore中的副操作及修正方案
在1.5新特性中,完善控制流,支持多种控制语句混合编写,支持循环自动微分,但是在使用条件为变量的流程控制语句时,图编译生成的网络模型中会包含控制流算子,在此场景下,正向图会执行两次。如果此时正向图中存在Assign等副作用算子并在训练情况下会导致反向图计算结果不符合预期。
代码:
from mindspore import ops
from mindspore import Tensor, nn
from mindspore import dtype as ms
from mindspore import context
from mindspore.ops import Parameter
from mindspore import composite
import numpy as np
class ForwardNet(nn.Cell):
def __init__(self):
super().__init__()
self.var = Parameter(Tensor(np.array(1), ms.int32))
self.assign = ops.Assign()
def construct(self, x, y):
if x < y:
tmp = self.var + 1
self.assign(self.var, tmp)
out = (self.var + 1) * x
out = out + 1
return out
class BackwardNet(nn.Cell):
def __init__(self, net):
super(BackwardNet, self).__init__(auto_prefix=False)
self.forward_net = net
self.grad = composite.GradOperation()
def construct(self, *inputs):
grads = self.grad(self.forward_net)(*inputs)
return grads
forward_net = ForwardNet()
backward_net = BackwardNet(forward_net)
x = Tensor(np.array(1), dtype=ms.int32)
y = Tensor(np.array(2), dtype=ms.int32)
output = backward_net(x, y)
print("lixiang assign output:", output)
运行结果:
期望x的梯度为3,但是实际执行得到的梯度为4,原因是正向图执行了两次(tmp = self.var + 1和self.assign(self.var, tmp)被执行了两次)。
如果想得到正确结果,代码需要修改如下:
from mindspore import ops
from mindspore import Tensor, nn
from mindspore import dtype as ms
from mindspore import context
from mindspore.ops import Parameter
from mindspore import composite
import numpy as np
class ForwardNet1(nn.Cell):
def __init__(self):
super().__init__()
self.var = Parameter(Tensor(np.array(1), ms.int32))
self.assign = ops.Assign()
def construct(self, x, y):
tmp = self.var + 1
self.assign(self.var, tmp)
out = (self.var + 1) * x
out = out + 1
return out
class ForwardNet2(nn.Cell):
def __init__(self):
super().__init__()
self.var = Parameter(Tensor(np.array(1), ms.int32))
self.assign = ops.Assign()
def construct(self, x, y):
out = (self.var + 1) * x
out = out + 1
return out
class BackwardNet(nn.Cell):
def __init__(self, net):
super(BackwardNet, self).__init__(auto_prefix=False)
self.forward_net = net
self.grad = composite.GradOperation()
def construct(self, *inputs):
grads = self.grad(self.forward_net)(*inputs)
return grads
x = Tensor(np.array(1), dtype=ms.int32)
y = Tensor(np.array(2), dtype=ms.int32)
if x < y:
forward_net = ForwardNet1()
else:
forward_net = ForwardNet2()
backward_net = BackwardNet(forward_net)
output = backward_net(x, y)
print("lixiang assign output:", output)
运行结果: