一元线性回归

一元线性回归

\[y=a+b x+\epsilon \]

\[\begin{aligned} Q \equiv Q(a, b) &=\sum_{i=1}^{n} e_{i}^{2} \\ &=\sum_{t=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2} \\ &=\sum_{i=1}^{n}\left(y_{i}-a-b x_{i}\right)^{2} \end{aligned} \]

\[\begin{aligned} a &=\bar{y}-b \bar{x} \\ &=\sum_{i=1}^{n}\left[\frac{1}{n}-\frac{\bar{x}\left(x_{i}-\bar{x}\right)}{\sum_{j=1}^{n}\left(x_{i}-\bar{x}\right)^{3}}\right] y_{i} \end{aligned} \]

\[\begin{aligned} \operatorname{Var}(a) &=\sum_{i=1}^{n}\left[\frac{1}{n}-\frac{\frac{i}{x}\left(x_{i}-\bar{x}\right)}{\sum_{j=1}^{n}\left(x_{i}-\bar{x}\right)^{2}}\right]^{2} \operatorname{Var}\left(y_{i}\right) \\ &=\left[\frac{1}{n}+\frac{\bar{x}^{2}}{\sum_{i=1}^{n}\left(x_{i}-\bar{x}\right)^{2}}\right] \sigma^{2} \end{aligned} \]

\[y=\beta_{0}+\beta_{1} x_{1}+\beta_{2} x_{2}+\ldots+\beta_{p} x_{p}+\varepsilon \]

Gauss-Markov条件:

\[\begin{array}{l} E\left(\varepsilon_{i}\right)=0, \operatorname{Var}\left(\varepsilon_{i}\right)=\sigma^{2}, i=1, \ldots, n \\ \operatorname{Cov}\left(\varepsilon_{i}, \varepsilon_{j}\right)=0, i \neq j, \text { andi }, j=1, \ldots, n \end{array}\]

正态分布条件:

\[\left\{\begin{array}{l} \varepsilon_{i} \sim N\left(0, \sigma^{2}\right) \\ \varepsilon_{1}, \varepsilon_{2}, \ldots, \varepsilon_{n} \quad \text { 相互独立 } \end{array}\right.\]


一元线性的矩阵表示

\[y_{i}=\beta_{0}+\beta_{1} x_{i}+\varepsilon_{i} \]

\[\begin{array}{l} \boldsymbol{y}=\left(y_{1}, \ldots, y_{n}\right)^{\top}, \quad \mathbf{1}=(1, \ldots, 1)^{\top} \\ \boldsymbol{x}=\left(x_{1}, \ldots, x_{n}\right)^{\top}, \quad \boldsymbol{X}=(1, \boldsymbol{x})_{n \times 2} \\ \varepsilon=\left(\varepsilon_{1}, \ldots, \varepsilon_{n}\right)^{\top}, \quad \boldsymbol{\beta}=\left(\beta_{0}, \beta_{1}\right)^{\top} \end{array}\]

\[\left\{\begin{array}{l} y=X \beta+\varepsilon \\ E(\varepsilon)=0 \\ \operatorname{Var}(\varepsilon)=\sigma^{2} I_{n} \end{array}\right.\]

\[Q\left(\beta_{0}, \beta_{1}\right)=\sum_{i=1}^{n}\left(y_{i}-\beta_{0}-\beta_{1} x_{i}\right)^{2} \]

\[\hat{\boldsymbol{\beta}}=\left(\hat{\beta}_{0}, \hat{\beta}_{1}\right)^{\top} \]

\[\hat{\boldsymbol{\beta}}=\arg \min \limits_{\boldsymbol{\beta} \in R^{2}} Q(\boldsymbol{\beta}) \]

\[\left\{\begin{array}{l} \frac{\partial Q}{\partial \beta_{0}}=\sum_{i=1}^{n}y_{i}-\beta_{0}-\beta_{1} x_{i}=0 \\ \frac{\partial Q}{\partial \beta_{1}}=x_i\sum_{i=1}^{n}\left(y_{i}-\beta_{0}-\beta_{1} x_{i}\right)= 0 \end{array}\right.\]

适当化简:

\[\left\{\begin{array}{ll} \hat{\beta_{0}}+\bar{x} \hat{\beta_{1}} & =\bar{y} \\ \bar{x} \hat{\beta_{0}}+\frac{\sum_{i=1}^{n} x_{i}^{2}}{n} \hat{\beta_{1}} & =\frac{\sum\limits_{i=1}^{n} x_{i} y_{i}}{n} \end{array}\right.\]

\[\rightarrow n\bar{x}\bar{y}-\sum x_iy_i=\hat{\beta_1}(\sum x_i^2-n\bar{x}^2) \]

\[S_{xx}=\sum\limits_{i=1}^{n}\left(x_{i}-\bar{x}\right)^{2}=\sum\limits_{i=1}^{n}x_i^2-n\bar{x}^2, S_{x y}=\sum\limits_{i=1}^{n} x_{i} y_{i}-n \bar{x} \bar{y} \]

\[\begin{array}{l} \hat{\beta}_{0}=\bar{y}-\hat{\beta}_{1} \bar{x} \\ \hat{\beta}_{1}=\frac{S_{x y}}{S_{x x}} \end{array}\]


性质:

线性性:
线性的定义: 关于随机变量$$\left{y_{i}, i=1, \ldots, n\right}$$的线性称之为线性估计量

\[\begin{aligned} \hat{\beta}_{1} &=\frac{\sum x_{i} y_{i}}{\sum x_{i}^{2}}=\frac{\sum x_{i}\left(Y_{i}-\bar{Y}\right)}{\sum x_{i}^{2}} (加减分离)\\ &=\frac{\sum x_{i} Y_{i}}{\sum x_{i}^{2}}-\frac{\bar{Y} \sum x_{i}}{\sum x_{i}^{2}}=\frac{\sum x_{i} Y_{i}}{\sum x_{i}^{2}}=KY_i\end{aligned}\]

\(k_{i}=\frac{\left(x_{i}-\bar{x}\right)}{\sum_{i=1}^{n}\left(x_{i}-\bar{x}\right)^{2}}\)

\[\sum x_{i}=\sum\left(X_{i}-\bar{X}\right)^2=\sum X_{i}^2-n \bar{X}^2=0 \]

无偏性:

\(\bar{x} =\frac{1}{n}\sum_{i=1}^{n}x_i\)

\(\hat{\beta}_{1}=\frac{S_{x y}}{S_{x x}}=\frac{y_i-\bar{y}}{x_i-\bar{x}}=\beta_{1}(error)\)

\(E(\hat{\beta}_{1})=E(\frac{y_i-\bar{y}}{x_i-\bar{x}})=E(\beta_{1})=\beta_{1}(error)\)

\[\begin{aligned} E\left(\hat{\beta}_{1}\right) &=E\left(\sum b_{i} y_{i}\right)=\sum b_{i} E\left(y_{i}\right)=\sum b_{i}\left(\beta_{0}+\beta_{1} x_{i}\right) \\ &=\sum \frac{x_{i}-\bar{x}}{\sum\left(x_{i}-\bar{x}\right)^{2}}\left(\beta_{0}+\beta_{1} x_{i}\right) \\ &=\beta_{0} \sum \frac{x_{i}-\bar{x}}{\sum\left(x_{i}-\bar{x}\right)^{2}}+\beta_{1} \sum \frac{\left(x_{i}-\bar{x}\right) x_{i}}{\sum\left(x_{i}-\bar{x}\right)^{2}}=\beta_{1} \end{aligned} \]

\[\begin{array}{l} \operatorname{Var}\left(\hat{\beta}_{1}\right)=\operatorname{Var}\left(\sum b_{i} y_{i}\right)=\sum \operatorname{Var}\left(b_{i} y_{i}\right)=\sum b_{i}^{2} \operatorname{Var}\left(y_{i}\right)=\sum\left(\frac{x_{i}-\bar{x}}{\sum\left(x_{i}-\bar{x}\right)^{2}}\right)^{2} \sigma^{2} \\ =\sigma^{2} \frac{\sum\left(x_{i}-\bar{x}\right)^{2}}{\left[\sum\left(x_{i}-\bar{x}\right)^{2}\right]^{2}}=\frac{\sigma^{2}}{\sum\left(x_{i}-\bar{x}\right)^{2}}=\frac{\sigma^{2}}{S_{x x}} \end{array} \]

\[\begin{aligned} \hat{\beta}_{0} &=\bar{y}-\hat{\beta}_{1} \bar{x}=\frac{1}{n} 1^{\prime} \boldsymbol{y}-\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y} \\ \operatorname{Var}\left(\hat{\beta}_{0}\right) &=\operatorname{Var}\left(\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}\right)+\operatorname{Var}\left(\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right)-2 \operatorname{Cov}\left(\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}, \bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right) \end{aligned} \]


最佳线性无偏估计(BLUE)

Best Linear Unbiased Estimation

对于参数 \(\theta\) 的一个无偏估计 \(\hat{\theta}\) ,如果对于任何一个它的无偏估计 \(\tilde{\theta}\), 都有 \(\operatorname{var}(\hat{\theta}) \leq \operatorname{var}(\tilde{\theta}),\) 则称 \(\hat{\theta}\)\(\theta\) 的最佳线性无偏估计。

平方和分解

SST (total sum of squares):总离差平方和

\[SST=\sum_{i=1}^{n}\left(y_{i}-\bar{y}\right)^{2} \]

Tips:$$\sum_{i=1}{n}\left(y_{i}-\bar{y}\right)2=\sum_{i=1}^{n} y_{i}^2-n \bar{y}^2$$
SSR(Sum of Squares for regression):回归平方和

\[SSR=\sum_{i=1}^{n}\left(\hat{y}_{i}-\bar{y}\right)^{2} =\sum(\hat{\beta_0}+\hat{\beta_1}x_i-\bar{y})=\hat{\beta_1}^2l_{xx} \]

SSE:

\[S S E=\sum_{i=1}^{n}\left(y_{i}-\hat{y}_{i}\right)^{2}=e_i^2 \]

\[\begin{aligned} y-\bar{y}=(y-\hat{y})+(\hat{y}-\bar{y}) & \\ \Rightarrow \sum(y-y)^{2}=\sum(y-\hat{y})^{2}+\Sigma(\hat{y}-\bar{y})^{2}+2 \Sigma(y-\hat{y})(\hat{y}-\bar{y}) \\ \because \Sigma(y-\hat{y})(\hat{y}-\bar{y}) &=\Sigma(y-\hat{y})(a+b x-\bar{y}) \\ &=\Sigma(y-\hat{y})[(a-\bar{y})+b x] \\ &=(a-\bar{y}) \Sigma(y-\hat{y})+b \Sigma(y-\hat{y}) x \\ &=(a-\bar{y}) \Sigma(y-a-b x)+b \Sigma(y-a-b x) x \end{aligned} \]

根据最小二乘法原理, 有:

\[\Sigma(y-a-b x)=0, \quad \Sigma(y-a-b x) x=0 \]

\(\therefore \Sigma(y-\hat{y})(\hat{y}-\bar{y})=0\)
\(\therefore \Sigma(y-y)^{2}=\Sigma(y-\hat{y})^{2}+\Sigma(\hat{y}-\bar{y})^{2}\)

\[SST=SSR+SSE \]

OLS的方差

\[\begin{aligned} \hat{\beta}_{0} &=\bar{y}-\hat{\beta}_{1} \bar{x}=\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}-\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y} \\ \operatorname{var}\left(\hat{\beta}_{0}\right) &=\operatorname{Var}\left(\frac{1}{n} \mathbf{1}^{\prime} \boldsymbol{y}\right)+\operatorname{Var}\left(\bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right)-2 \operatorname{cov}\left(\frac{1}{n} \boldsymbol{1}^{\prime} \boldsymbol{y}, \bar{x} \boldsymbol{c}^{\prime} \boldsymbol{y}\right) \end{aligned} \]

posted @ 2020-12-10 16:06  _OscarLi  阅读(430)  评论(0编辑  收藏  举报