深度学习入门(9):与学习相关技巧

随机梯度下降的实现

class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self,params,grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(-10,10,100)
y = np.linspace(-10,10,100)
x,y = np.meshgrid(x,y)
z = 1/20*x**2+y**2



fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(x, y, z)

plt.show()


遇见这种函数时候随机梯度下降法的效率不高,所以引出其他的方法求最优解

Momentum

class Momentum:
    def __init__(self, lr=0.01,momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
        
    def update(self,params,grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)
                
            for key in params.keys():
                self.v[key] += self.lr * grads[key]
                params[key] += self.v[key]

AdaGrad

class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None
        
    def update(self,params,grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
                
        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= (self.lr * grads[key] / 
                            (np.sqrt(self.h[key])+ 1e-7))
            

Adam

 class Adam:
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.m = None
        self.v = None
        
    def update(self, params, grads):
        if self.m is None:
            self.m, self.v = {}, {}
            for key, val in params.items():
                self.m[key] = np.zeros_like(val)
                self.v[key] = np.zeros_like(val)
        
        self.iter += 1
        lr_t  = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)         
        
        for key in params.keys():
            #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key]
            #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2)
            self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
            self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
            
            params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)

Dropout实现

class Dropout:
    def __init__(self, dropout_ratio=0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None
        
    def forward(self,x,train_flg=True):
        if train_flg:
            self.mask = np.random.rand(*x.shape)>self.dropout_ratio
            return x*self.mask
        else:
            return x*(1.0 - self.dropout_ratio)
        
    def backward(self,dout):
        return dout*self.mask
    

参考资料

《深度学习入门:基于python的理论与实践》

posted @ 2025-03-06 21:12  屈臣  阅读(2)  评论(0)    收藏  举报