梯度衰减调大学习率管用吗?

不管用。因为学习率乘以梯度是步长,而梯度衰减返回的梯度是0所以调大学习率不管用。

from enum import auto
from scipy.io import loadmat
import numpy as np
import torch
import torch.utils.data as data_utils
from torch import nn
import torch.optim as optim
network=nn.Linear(1,1)
#network1=nn.BatchNorm1d(1)
w=nn.Sigmoid()
tr=torch.Tensor([[100000],[200000]])
#tr=torch.Tensor([[1],[2]])
test=torch.Tensor([[150000],[300000]])
optimizer = optim.Adam(network.parameters(), lr=4000000000000000)
#optimizer1 = optim.Adam(network1.parameters(), lr=0.04)
l1=0
while True:
    network.train()
    #network1.train()
    #network1.eval()#
    optimizer.zero_grad()
    #optimizer1.zero_grad()
    l=w(network(tr))
    #l=w(network1(network(tr)))
    #print(network1(network(tr)))
    l=(l[0]-0)**2+(l[1]-1)**2
    l.backward()
    for name, parms in network.named_parameters():  
        print('-->name:', name)
        print('-->para:', parms)
        print('-->grad_requirs:',parms.requires_grad)
        print('-->grad_value:',parms.grad)
        print("===")
输出是0
posted @ 2022-11-07 10:33  祥瑞哈哈哈  阅读(21)  评论(0)    收藏  举报