# 一、前言

EM算法主要针对概率生成模型解决具有隐变量的混合模型的参数估计问题。

E步解决隐变量的问题，M步求解模型的参数值，也就是极大似然的方法求取模型的参数值。

# 二、概览

$\arg\max_{\theta} logP(X|\theta)$

$\theta^{(t+1)} = \arg\max_{\theta} \int_Z \; P(Z|X,\theta^{(t)})\;logP(X,Z|\theta)$

$P(X)=\int_{Z}P(X,Z)dZ$，把 $P(X)$ 分解处理。

# 三、收敛性

\begin{aligned} logP(X|\theta) &=log \frac {P(X,Z|\theta)}{P(Z|X,\theta)}\\ &=log {P(X,Z|\theta)} - log{P(Z|X,\theta)} \end{aligned}

\begin{aligned} 左边&=\int_{Z}P(Z|X,\theta^{(t)}) log {P(X|\theta)} dZ\\ &=log {P(X|\theta)} \int_{Z}P(Z|X,\theta^{(t)})dZ\\ &=log {P(X|\theta)} \end{aligned}

\begin{aligned} 右边&=\int_{Z}P(Z|X,\theta^{(t)}) \left[log {P(X,Z|\theta)} - log{P(Z|X,\theta)}\right] dZ\\ &=\int_{Z}P(Z|X,\theta^{(t)}) log {P(X,Z|\theta)} dZ - \int_{Z}P(Z|X,\theta^{(t)}) log{P(Z|X,\theta)} dZ\\ &Q(\theta,\theta^{(t)}) = \int_{Z}P(Z|X,\theta^{(t)}) log {P(X,Z|\theta)} dZ\\ &H(\theta,\theta^{(t)}) = \int_{Z}P(Z|X,\theta^{(t)}) log{P(Z|X,\theta)} dZ\\ &log {P(X|\theta^{(t+1)})} -log {P(X|\theta^{(t)})} = Q(\theta^{(t+1)},\theta^{(t)}) -Q(\theta^{(t)},\theta^{(t)}) + H(\theta^{(t)},\theta^{(t)}) -H(\theta^{(t+1)},\theta^{(t)}) \end{aligned}

$Q(\theta^{(t+1)},\theta^{(t)})\geq Q(\theta,\theta^{(t)})$

$Q(\theta^{(t+1)},\theta^{(t)})\geq Q(\theta^{(t)},\theta^{(t)})$

\begin{aligned} H(\theta^{(t)},\theta^{(t)}) -H(\theta^{(t+1)},\theta^{(t)}&=\int_{Z}P(Z|X,\theta^{(t)}) log{P(Z|X,\theta^{(t)})}- \int_{Z}P(Z|X,\theta^{(t)}) log{P(Z|X,\theta^{(t+1)})}dZ\\ &=\int_{Z}P(Z|X,\theta^{(t)}) [log{P(Z|X,\theta^{(t)})}-log{P(Z|X,\theta^{(t+1)})}]dZ\\ &=\int_{Z}P(Z|X,\theta^{(t)})log \frac{P(Z|X,\theta^{(t)})}{P(Z|X,\theta^{(t+1)})}dZ\\ &=KL(P(Z|X,\theta^{(t)}) \;||\; P(Z|X,\theta^{(t+1)})) \geq 0 \end{aligned}

$log {P(X|\theta^{(t+1)})} \geq log {P(X|\theta^{(t)})}$

# 四、完整的推导

Jesen不等式：当f是凹函数时：$$f[E] \geq E[f]$$

\begin{aligned} log {P(X|\theta)} &= log \int_{Z}P(X,Z|\theta)dZ=log \int_{Z} \frac{P(X,Z|\theta)}{q(Z)}q(Z)dZ\\ &=log E_{q(Z)}[\frac{P(X,Z|\theta)}{q(Z)}] \geq E_{q(Z)}[log \frac{P(X,Z|\theta)}{q(Z)}] \end{aligned}

\begin{aligned} log {P(X|\theta)} &=log\frac{P(X,Z|\theta)}{P(Z|X,\theta)}\\ &=log P(X,Z|\theta) - log P(Z|X,\theta)\\ &=log \frac {P(X,Z|\theta)}{q(Z)} - \frac {log P(Z|X,\theta)}{q(Z)} \end{aligned}

\begin{aligned} 右边 &= \int_{Z}q(Z)log \frac {P(X,Z|\theta)}{q(Z)}dZ-\int_{Z}q(Z)log \frac {log P(Z|X,\theta)}{q(Z)}dZ\\ & = ELBO+KL(q(Z)\;||\;P(Z|X,\theta)) \end{aligned}

$q(Z)与P(Z|X,\theta)$ 同分布时取等号。

E步：找到一个q = p
M步：(多种不同的形式)

\begin{aligned} &\arg\max_{\theta} \int_{Z}q(Z)log \frac {P(X,Z|\theta)}{q(Z)}dZ \\ &\arg\max_{\theta}\int_{Z}q(Z)log {P(X,Z|\theta)}dZ\\ &\arg\max_{\theta}\int_{Z}P(Z|X,\theta^{(t)})log {P(X,Z|\theta)}dZ\\ &\arg\max_{\theta}\sum_{Z}P(Z|X,\theta^{(t)})log {P(X,Z|\theta)} \end{aligned}

$log P(X|\theta) = ELBO$

# 五、广义EM

$logP(X|\theta) = ELBO+KL(q(Z)||P(Z|X,\theta))$

$L(q,\theta)=ELBO = E_{q(Z)}[ log\frac{P(X,Z|\theta)} {q(Z)}]$

$logP(X|\theta) = E_{q(Z)}[ log{P(X,Z|\theta)}]-E_{q(Z)}[log \;q(Z)]+KL(q(Z)||P(Z|X,\theta))$
$= E_{q(Z)}[ log{P(X,Z|\theta)}]+H(q(Z))+KL(q(Z)||P(Z|X,\theta))$
$= E_{q(Z)}[ log{P(X,Z|\theta)}]+H(q(Z),P(Z|X,\theta))$

E-step:固定 $\theta$,找出q,此时 $logP(X|\theta)$是定值：

\begin{aligned} q^{(t+1)} &= \arg\min_{q}KL(q||P) = \arg\max_{q}ELBO\\& = \arg\max_{q} L(q,\theta^{(t)})\\&=\arg\max_{\theta} E_{q(Z)}[ log\frac{P(X,Z|\theta^{(t)})} {q(Z)}] \end{aligned}

M-step:固定 $q$,找出 $\theta$

\begin{aligned} \theta^{(t+1)} &= \arg\max_{\theta}ELBO =\arg\max_{\theta} L(q^{(t+1)},\theta)\\&= \arg\max_{\theta} E_{q^{(t+1)}(Z)}[ log\frac{P(X,Z|\theta)} {q^{(t+1)}(Z)}]\\&= \arg\max_{\theta} E_{q^{(t+1)}(Z)}[ log{P(X,Z|\theta)} ] \end{aligned}

posted @ 2019-10-07 21:58  SpringC  阅读(220)  评论(0编辑  收藏