#-----------------------------------#
# R in Action (2nd ed): Chapter 18  #
# Advanced methods for missing data #
# requires packages VIM, mice       #
# install.packages(c("VIM", mice))  #
#-----------------------------------#

par(ask=TRUE)


# load the dataset
data(sleep, package="VIM")


# list the rows that do not have missing values
sleep[complete.cases(sleep),]


# list the rows that have one or more missing values
sleep[!complete.cases(sleep),]


# tabulate missing values patters
library(mice)
md.pattern(sleep)


# plot missing values patterns
library("VIM")
aggr(sleep, prop=FALSE, numbers=TRUE)
matrixplot(sleep)
marginplot(sleep[c("Gest","Dream")], pch=c(20), 
           col=c("darkgray", "red", "blue"))


# use correlations to explore missing values
x <- as.data.frame(abs(is.na(sleep)))
head(sleep, n=5)
head(x, n=5)
y <- x[which(apply(x,2,sum)>0)]
cor(y)
cor(sleep, y, use="pairwise.complete.obs")


# complete case analysis (listwise deletion)
options(digits=1)
cor(na.omit(sleep))
fit <- lm(Dream ~ Span + Gest, data=na.omit(sleep))
summary(fit)


# multiple imputation
options(digits=3)
library(mice)
data(sleep, package="VIM")
imp <- mice(sleep, seed=1234)
fit <- with(imp, lm(Dream ~ Span + Gest))
pooled <- pool(fit)
summary(pooled)
imp