BoydC9pt2

1756350834551


Descent methods

Descent methods produce a minimizing sequence \(x^{(k)}\),

\[x^{(k+1)} = x^{(k)}+t^{(k)}\Delta x^{(k)}\quad f(x^{(k+1)})<f(x^{(k)}) \]

Gradient descent method (GD)

GD: \(x^{(k+1)} = x^{(k)}-t\nabla f(x^{(k)})\)

Convergence analysis

\(1^{\circ}\). convex + L-smooth, suppose \(x^{(k+1)} = x^{(k)}-\eta\nabla f(x^{(k)})\),

Soln 1.

\[\begin{aligned} &f(x^{(k+1)})-f(x^*)\\ \leq &f(x^{(k)})-f(x^*)+\nabla f(x^{(k)})^T(x^{(k+1)}-x^{(k)})+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2\\ \leq & \nabla f(x^{(k)})^T(x^{(k+1)}-x^*)+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2-\frac{1}{2L}\|\nabla f(x^{(k)})\|^2\\ =& -\frac{1}{\eta}(x^{(k+1)}-x^{(k)})^T(x^{(k+1)}-x^*)+\frac{1}{2}(L-\frac{1}{L\eta^2})\|x^{(k+1)}-x^{(k)}\|^2\\ =& (-\frac{1}{\eta}+\frac{1}{2}(L-\frac{1}{L\eta^2}))\|x^{(k+1)}-x^*\|^2 + \frac{1}{2}(L-\frac{1}{L\eta^2})\|x^{(k)}-x^*\|^2\\ & + (\frac{1}{\eta}-L+\frac{1}{L\eta^2})(x^{(k)}-x^*)^T(x^{(k+1)}-x^*)\\ \end{aligned} \]

Let \(\eta\leq (\frac{1}{2}+\frac{\sqrt{5}}{2})\frac{1}{L}\), \((\frac{1}{\eta}-L+\frac{1}{L\eta^2})(x^{(k)}-x^*)^T(x^{(k+1)}-x^*)\) \(\leq\) \(\frac{1}{2}(\frac{1}{\eta}-L+\frac{1}{L\eta^2})(\|x^{(k+1)}-x^*\|^2+\|x^{(k)}-x^*\|^2)\), then

\[f(x^{(k+1)})-f(x^*)\leq \frac{1}{2\eta}(\|x^{(k)}-x^*\|^2-\|x^{(k+1)}-x^*\|^2) \]

Thus

\[\sum_{k=1}^T [f(x^{(k)})-f(x^*)]\leq \frac{1}{2\eta}(\|x^{(0)}-x^*\|^2-\|x^{(T)}-x^*\|^2)\leq\frac{1}{2\eta}\|x^{(0)}-x^*\|^2 \]

Furthermore,

\[\begin{aligned} &f(x^{(k+1)})-f(x^*)\\ \leq &f(x^{(k)})-f(x^*)+\nabla f(x^{(k)})^T(x^{(k+1)}-x^{(k)})+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2\\ =& f(x^{(k)})-f(x^*)-(\frac{1}{\eta}-\frac{L}{2})\|x^{(k+1)}-x^{(k)}\|^2\\ \leq& f(x^{(k)})-f(x^*) \end{aligned} \]

then

\[f(x^{(T)})-f(x^*)\leq\frac{1}{T}\sum_{k=1}^T [f(x^{(k)})-f(x^*)]\leq\frac{1}{2T\eta}\|x^{(0)}-x^*\|^2 \]

Soln 2.

\[\begin{aligned} &f(x^{(k+1)})-f(x^*)\\ \leq &f(x^{(k)})-f(x^*)+\nabla f(x^{(k)})^T(x^{(k+1)}-x^{(k)})+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2\\ =& f(x^{(k)})-f(x^*)+(-\eta+\frac{L}{2}\eta^2)\|\nabla f(x^{(k)})\|^2 \end{aligned} \]

let \(\eta\in(0,\frac{2}{L})\)

lemma 1:

\[f(x^{(k)})-f(x^*)\leq \nabla f(x^{(k)})^T(x^{(k)}-x^*)\leq \|\nabla f(x^{(k)})\|\cdot\|x^{(k)}-x^*\| \]

lemma 2:

\[\begin{aligned} &\|x^{(k+1)}-x^*\|^2-\|x^{(k)}-x^*\|^2 \\ =& -2\eta \nabla f(x^{(k)})^T(x^{(k)}-x^*)+\eta^2\|\nabla f(x^{(k)})\|^2\\ \leq& (-\frac{2\eta}{L}+\eta^2)\|\nabla f(x^{(k)})\|^2<0 \end{aligned} \]

Thus,

\[\begin{aligned} &f(x^{(k+1)})-f(x^*)\\ \leq& f(x^{(k)})-f(x^*)+(-\eta+\frac{L}{2}\eta^2)\|\nabla f(x^{(k)})\|^2\\ \leq& f(x^{(k)})-f(x^*)-\frac{\eta-\frac{L}{2}\eta^2}{\|x^{(k)}-x^*\|^2}(f(x^{(k)})-f(x^*))^2\\ \leq& f(x^{(k)})-f(x^*)-\frac{\eta-\frac{L}{2}\eta^2}{\|x^{(0)}-x^*\|^2}(f(x^{(k)})-f(x^*))(f(x^{(k+1)})-f(x^*))\\ \end{aligned} \]

Then,

\[\begin{aligned} &\frac{1}{f(x^{(k)})-f(x^*)}\leq \frac{1}{f(x^{(k+1)})-f(x^*)}-\frac{\eta-\frac{L}{2}\eta^2}{\|x^{(0)}-x^*\|^2}\\ \implies\quad & f(x^{(T)})-f(x^*)\leq\frac{1}{T(\eta-\frac{L}{2}\eta^2)}\|x^{(0)}-x^*\|^2 \end{aligned} \]

Soln 3.

\[\begin{aligned} &\|x^{(k+1)}-x^*\|^2-\|x^{(k)}-x^*\|^2 \\ =& -2\eta \nabla f(x^{(k)})^T(x^{(k)}-x^*)+\eta^2\|\nabla f(x^{(k)})\|^2\\ = & -\eta_1 \nabla f(x^{(k)})^T(x^{(k)}-x^*)-\eta_2 \nabla f(x^{(k)})^T(x^{(k)}-x^*)+\eta^2\|\nabla f(x^{(k)})\|^2\\ \leq & -\eta_1(f(x^{(k)})-f(x^*)+\frac{1}{2L}\|\nabla f(x^{(k)})\|^2)+(-\frac{\eta_2}{L}+\eta^2)\|\nabla f(x^{(k)})\|^2\\ =& -\eta_1(f(x^{(k)})-f(x^*))+(-\frac{\eta}{L}-\frac{\eta_2}{2L}+\eta^2)\|\nabla f(x^{(k)})\|^2 \end{aligned} \]

where \(\eta_1+\eta_2=2\eta,~\eta_1,\eta_2\geq 0\) and \(-\frac{\eta}{L}+\eta^2\leq\frac{\eta_2}{2L}<\frac{\eta}{L}\), let \(\frac{\eta_2}{2L}=-\frac{\eta}{L}+\eta^2\), then \(\eta_1=2\eta-\eta_2=2\eta(2-L\eta)\), \(\eta\in(0,\frac{2}{L})\implies \eta_1\in (0,\frac{2}{L}]\)

\[\begin{aligned} f(x^{(k)})-f(x^*)\leq \frac{1}{\eta_1}(\|x^{(k)}-x^*\|^2-\|x^{(k+1)}-x^*\|^2) \end{aligned} \]

Similar to soln 1,

\[f(x^{(T)})-f(x^*)\leq\frac{1}{T\eta_1}\|x^{(0)}-x^*\|^2 \]

posted @ 2025-08-28 20:22  p0q  阅读(9)  评论(0)    收藏  举报