利用梯度下降求一个凸函数的最小值
一、流程
- 求函数的导数
- 更新x,x_new = x - learning_rate * gradient
- 检查收敛性: |gradient| < tolerance
二、为什么沿着梯度方向就一定能达到最值?
需要用泰勒展开式进行推导。过程省略
import numpy as np import matplotlib.pyplot as plt def f(x): """目标函数: f(x) = (x+3)^2 + 1""" return (x + 3) ** 2 + 1 def grad_f(x): """梯度(导数): f'(x) = 2(x+3)""" return 2 * (x + 3) def gradient_descent(learning_rate=0.1, max_iters=100, tolerance=1e-6, x0=0): """ 梯度下降算法 参数: learning_rate: 学习率 max_iters: 最大迭代次数 tolerance: 收敛容差 x0: 初始点 返回: x_history: x的历史值 f_history: 函数值的历史 """ x = x0 x_history = [x] f_history = [f(x)] print(f"初始值: x0 = {x0:.4f}, f(x0) = {f(x0):.4f}") print("-" * 50) for i in range(max_iters): # 计算梯度 gradient = grad_f(x) # 更新x x_new = x - learning_rate * gradient # 记录历史 x_history.append(x_new) f_history.append(f(x_new)) # 打印迭代信息 if i < 10 or i % 10 == 9 or i == max_iters - 1: print(f"迭代 {i + 1:3d}: x = {x_new:8.6f}, f(x) = {f(x_new):8.6f}, 梯度 = {gradient:8.6f}") # 检查收敛 if abs(gradient) < tolerance: print(f"\n在 {i + 1} 次迭代后收敛!") break x = x_new return x_history, f_history def plot_results(x_history, f_history): """绘制优化过程""" fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) # 绘制函数曲线和优化路径 x_vals = np.linspace(-5, 1, 100) y_vals = f(x_vals) ax1.plot(x_vals, y_vals, 'b-', label='f(x) = (x+3)² + 1', linewidth=2) ax1.plot(x_history, f_history, 'ro-', label='优化路径', markersize=4) ax1.set_xlabel('x') ax1.set_ylabel('f(x)') ax1.set_title('梯度下降优化过程') ax1.legend() ax1.grid(True) # 绘制函数值收敛情况 ax2.semilogy(range(len(f_history)), f_history, 'g-o', markersize=4) ax2.set_xlabel('迭代次数') ax2.set_ylabel('f(x) (对数坐标)') ax2.set_title('函数值收敛情况') ax2.grid(True) plt.tight_layout() plt.show() # 运行梯度下降 print("梯度下降法求解 f(x) = (x+3)² + 1 的最小值") print("=" * 50) # 不同学习率的比较 learning_rates = [0.01, 0.1, 0.3] results = {} for lr in learning_rates: print(f"\n学习率 α = {lr}:") x_hist, f_hist = gradient_descent(learning_rate=lr, x0=0) results[lr] = (x_hist, f_hist) print(f"最终结果: x = {x_hist[-1]:.6f}, f(x) = {f_hist[-1]:.6f}") # 绘制最后一个学习率的结果 plot_results(results[0.1][0], results[0.1][1]) # 比较不同学习率的收敛速度 print("\n" + "=" * 50) print("不同学习率的收敛情况比较:") print("=" * 50) for lr in learning_rates: x_hist, f_hist = results[lr] iterations = len(x_hist) - 1 final_x = x_hist[-1] final_f = f_hist[-1] print(f"α = {lr:.2f}: {iterations:2d} 次迭代, x = {final_x:7.4f}, f(x) = {final_f:7.4f}") # 理论最小值 print(f"\n理论最小值: x = -3.0000, f(x) = 1.0000")
谢谢!

浙公网安备 33010602011771号