
Descent methods
Descent methods produce a minimizing sequence \(x^{(k)}\),
\[x^{(k+1)} = x^{(k)}+t^{(k)}\Delta x^{(k)}\quad f(x^{(k+1)})<f(x^{(k)})
\]
Gradient descent method (GD)
GD: \(x^{(k+1)} = x^{(k)}-t\nabla f(x^{(k)})\)
Convergence analysis
\(1^{\circ}\). convex + L-smooth, suppose \(x^{(k+1)} = x^{(k)}-\eta\nabla f(x^{(k)})\),
Soln 1.
\[\begin{aligned}
&f(x^{(k+1)})-f(x^*)\\
\leq &f(x^{(k)})-f(x^*)+\nabla f(x^{(k)})^T(x^{(k+1)}-x^{(k)})+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2\\
\leq & \nabla f(x^{(k)})^T(x^{(k+1)}-x^*)+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2-\frac{1}{2L}\|\nabla f(x^{(k)})\|^2\\
=& -\frac{1}{\eta}(x^{(k+1)}-x^{(k)})^T(x^{(k+1)}-x^*)+\frac{1}{2}(L-\frac{1}{L\eta^2})\|x^{(k+1)}-x^{(k)}\|^2\\
=& (-\frac{1}{\eta}+\frac{1}{2}(L-\frac{1}{L\eta^2}))\|x^{(k+1)}-x^*\|^2 + \frac{1}{2}(L-\frac{1}{L\eta^2})\|x^{(k)}-x^*\|^2\\
& + (\frac{1}{\eta}-L+\frac{1}{L\eta^2})(x^{(k)}-x^*)^T(x^{(k+1)}-x^*)\\
\end{aligned}
\]
Let \(\eta\leq (\frac{1}{2}+\frac{\sqrt{5}}{2})\frac{1}{L}\), \((\frac{1}{\eta}-L+\frac{1}{L\eta^2})(x^{(k)}-x^*)^T(x^{(k+1)}-x^*)\) \(\leq\) \(\frac{1}{2}(\frac{1}{\eta}-L+\frac{1}{L\eta^2})(\|x^{(k+1)}-x^*\|^2+\|x^{(k)}-x^*\|^2)\), then
\[f(x^{(k+1)})-f(x^*)\leq \frac{1}{2\eta}(\|x^{(k)}-x^*\|^2-\|x^{(k+1)}-x^*\|^2)
\]
Thus
\[\sum_{k=1}^T [f(x^{(k)})-f(x^*)]\leq \frac{1}{2\eta}(\|x^{(0)}-x^*\|^2-\|x^{(T)}-x^*\|^2)\leq\frac{1}{2\eta}\|x^{(0)}-x^*\|^2
\]
Furthermore,
\[\begin{aligned}
&f(x^{(k+1)})-f(x^*)\\
\leq &f(x^{(k)})-f(x^*)+\nabla f(x^{(k)})^T(x^{(k+1)}-x^{(k)})+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2\\
=& f(x^{(k)})-f(x^*)-(\frac{1}{\eta}-\frac{L}{2})\|x^{(k+1)}-x^{(k)}\|^2\\
\leq& f(x^{(k)})-f(x^*)
\end{aligned}
\]
then
\[f(x^{(T)})-f(x^*)\leq\frac{1}{T}\sum_{k=1}^T [f(x^{(k)})-f(x^*)]\leq\frac{1}{2T\eta}\|x^{(0)}-x^*\|^2
\]
Soln 2.
\[\begin{aligned}
&f(x^{(k+1)})-f(x^*)\\
\leq &f(x^{(k)})-f(x^*)+\nabla f(x^{(k)})^T(x^{(k+1)}-x^{(k)})+\frac{L}{2}\|x^{(k+1)}-x^{(k)}\|^2\\
=& f(x^{(k)})-f(x^*)+(-\eta+\frac{L}{2}\eta^2)\|\nabla f(x^{(k)})\|^2
\end{aligned}
\]
let \(\eta\in(0,\frac{2}{L})\)
lemma 1:
\[f(x^{(k)})-f(x^*)\leq \nabla f(x^{(k)})^T(x^{(k)}-x^*)\leq \|\nabla f(x^{(k)})\|\cdot\|x^{(k)}-x^*\|
\]
lemma 2:
\[\begin{aligned}
&\|x^{(k+1)}-x^*\|^2-\|x^{(k)}-x^*\|^2 \\
=& -2\eta \nabla f(x^{(k)})^T(x^{(k)}-x^*)+\eta^2\|\nabla f(x^{(k)})\|^2\\
\leq& (-\frac{2\eta}{L}+\eta^2)\|\nabla f(x^{(k)})\|^2<0
\end{aligned}
\]
Thus,
\[\begin{aligned}
&f(x^{(k+1)})-f(x^*)\\
\leq& f(x^{(k)})-f(x^*)+(-\eta+\frac{L}{2}\eta^2)\|\nabla f(x^{(k)})\|^2\\
\leq& f(x^{(k)})-f(x^*)-\frac{\eta-\frac{L}{2}\eta^2}{\|x^{(k)}-x^*\|^2}(f(x^{(k)})-f(x^*))^2\\
\leq& f(x^{(k)})-f(x^*)-\frac{\eta-\frac{L}{2}\eta^2}{\|x^{(0)}-x^*\|^2}(f(x^{(k)})-f(x^*))(f(x^{(k+1)})-f(x^*))\\
\end{aligned}
\]
Then,
\[\begin{aligned}
&\frac{1}{f(x^{(k)})-f(x^*)}\leq \frac{1}{f(x^{(k+1)})-f(x^*)}-\frac{\eta-\frac{L}{2}\eta^2}{\|x^{(0)}-x^*\|^2}\\
\implies\quad & f(x^{(T)})-f(x^*)\leq\frac{1}{T(\eta-\frac{L}{2}\eta^2)}\|x^{(0)}-x^*\|^2
\end{aligned}
\]
Soln 3.
\[\begin{aligned}
&\|x^{(k+1)}-x^*\|^2-\|x^{(k)}-x^*\|^2 \\
=& -2\eta \nabla f(x^{(k)})^T(x^{(k)}-x^*)+\eta^2\|\nabla f(x^{(k)})\|^2\\
= & -\eta_1 \nabla f(x^{(k)})^T(x^{(k)}-x^*)-\eta_2 \nabla f(x^{(k)})^T(x^{(k)}-x^*)+\eta^2\|\nabla f(x^{(k)})\|^2\\
\leq & -\eta_1(f(x^{(k)})-f(x^*)+\frac{1}{2L}\|\nabla f(x^{(k)})\|^2)+(-\frac{\eta_2}{L}+\eta^2)\|\nabla f(x^{(k)})\|^2\\
=& -\eta_1(f(x^{(k)})-f(x^*))+(-\frac{\eta}{L}-\frac{\eta_2}{2L}+\eta^2)\|\nabla f(x^{(k)})\|^2
\end{aligned}
\]
where \(\eta_1+\eta_2=2\eta,~\eta_1,\eta_2\geq 0\) and \(-\frac{\eta}{L}+\eta^2\leq\frac{\eta_2}{2L}<\frac{\eta}{L}\), let \(\frac{\eta_2}{2L}=-\frac{\eta}{L}+\eta^2\), then \(\eta_1=2\eta-\eta_2=2\eta(2-L\eta)\), \(\eta\in(0,\frac{2}{L})\implies \eta_1\in (0,\frac{2}{L}]\)
\[\begin{aligned}
f(x^{(k)})-f(x^*)\leq \frac{1}{\eta_1}(\|x^{(k)}-x^*\|^2-\|x^{(k+1)}-x^*\|^2)
\end{aligned}
\]
Similar to soln 1,
\[f(x^{(T)})-f(x^*)\leq\frac{1}{T\eta_1}\|x^{(0)}-x^*\|^2
\]