install.packages(c("DescTools", "FactoMineR", "knitr", "mice", "micemd", "VIM"))
knitr::opts_chunk$set(echo = TRUE, eval=FALSE)
install.packages(c("DescTools", "FactoMineR", "knitr", "mice", "micemd", "VIM"))
knitr::opts_chunk$set(echo = TRUE, eval=FALSE)
install.packages(c("micemd","FactoMineR","parallel","DescTools","VIM","mice", "mvtnorm"))
load("diabetes.Rdata")
knitr::opts_chunk$set(echo = TRUE, eval=FALSE)
load("diabetes.Rdata")
load("Data/diabetes.Rdata")
par(mfrow = c(3,3))
sapply(colnames(diabetes)[1:8],
FUN = function(xx, diabetes){
boxplot(diabetes[,xx]~diabetes$Outcome,
main=xx,
ylab=xx,
xlab="outcome")
}
,diabetes=diabetes)
res.aggr <- aggr(diabetes)
library(mice)
library(micemd)
library(FactoMineR)
library(parallel)
library(DescTools)
library(VIM)
library(mvtnorm)
install.packages("missMDA")
res.aggr <- aggr(diabetes)
str(res.aggr)
res.aggr$missings
cbind(res.aggr$tabcomb, res.aggr$percent)
is.na(diabetes)
var.na <- which(res.aggr$missing$Count>0)
names(var.na) <- colnames(diabetes)[var.na]
pattern <- is.na(diabetes[, var.na])
matcram <- PairApply(pattern, CramerV)
PlotCorr(matcram)
#Nuage de points
marginmatrix(diabetes[,c("Age","Skin.Thick")])
marginmatrix(diabetes[,c("Insulin","Skin.Thick")])
marginmatrix(diabetes[,-ncol(diabetes)],cex=.2,gap=0)
#matrixplot
matrixplot(diabetes,sortby = 2)
#ACM
diabetes.cat<-diabetes
quanti<-which(sapply(diabetes,is.numeric))
for(i in quanti){
breaks<-c(-Inf,quantile(diabetes.cat[[i]],
na.rm=T)[-1])
diabetes.cat[[i]]<-cut(diabetes.cat[[i]],
breaks=breaks,labels=F)
diabetes.cat[[i]]<-addNA(diabetes.cat[[i]],ifany=TRUE)
}
summary(diabetes.cat)
#ventilation
#glucose
diabetes.cat$Glucose[is.na(diabetes$Glucose)]<-sample(c("1", "2", "3", "4"),size= 5,replace=TRUE)
diabetes.cat$Glucose<-droplevels(diabetes.cat$Glucose)
#BMI
diabetes.cat$BMI[is.na(diabetes$BMI)]<-sample(c("1", "2", "3", "4"),size= 11,replace=TRUE)
diabetes.cat$BMI<-droplevels(diabetes.cat$BMI)
summary(diabetes.cat)
res.mca<-MCA(diabetes.cat,graph=FALSE,level.ventil = 0.04)
plot(res.mca,choix="ind",invisible="ind")
?mice
res.mice <- mice(diabetes, maxit = 50, printFlag = FALSE)
tableau1 <- complete(res.mice,1)
summary(tableau1)
plot(res.mice)
densityplot(res.mice)
res.mice$method
library(mice)
library(micemd)
library(FactoMineR)
library(parallel)
library(DescTools)
library(VIM)
library(mvtnorm)
knitr::opts_chunk$set(echo = TRUE, eval=FALSE)
load("diabetes.Rdata")
load("Data/diabetes.Rdata")
res.aggr <- aggr(diabetes)
str(res.aggr)
res.aggr$missings
cbind(res.aggr$tabcomb, res.aggr$percent)
is.na(diabetes)
names(var.na) <- colnames(diabetes)[var.na]
var.na <- which(res.aggr$missing$Count>0)
names(var.na) <- colnames(diabetes)[var.na]
pattern <- is.na(diabetes[, var.na])
matcram <- PairApply(pattern, CramerV)
PlotCorr(matcram)
res.aggr$missings
# analyse multivariée
MCA(pattern)
aggr(diabetes)
# analyse multivariée
MCA(pattern)
PlotCorr(matcram)
# Nuage de points
marginmatrix(diabetes[,c("Age","Skin.Thick")])
marginmatrix(diabetes[,c("Insulin","Skin.Thick")])
marginmatrix(diabetes[,-ncol(diabetes)],cex=.2,gap=0)
# Nuage de points
marginmatrix(diabetes[,c("Age","Skin.Thick")])
var.na
# Nuage de points
marginmatrix(diabetes[,c("Age","Skin.Thick")]) # une variable complète et une incomplète
boxplot(diabetes$Age~is.na(diabetes$Skin.Thick))
# Nuage de points
# une variable complète (age) et une incomplète (skin.thick)
marginmatrix(diabetes[,c("Age","Skin.Thick")])
boxplot(diabetes$Age~is.na(diabetes$Skin.Thick))
# deux variables incomplètes
marginmatrix(diabetes[,c("Insulin","Skin.Thick")])
# tous les couples
marginmatrix(diabetes[,-ncol(diabetes)],cex=.2,gap=0)
#matrixplot
matrixplot(diabetes,sortby = 2)
#ACM
diabetes.cat<-diabetes
quanti<-which(sapply(diabetes,is.numeric))
for(i in quanti){
breaks<-c(-Inf,quantile(diabetes.cat[[i]],
na.rm=T)[-1])
diabetes.cat[[i]]<-cut(diabetes.cat[[i]],
breaks=breaks,labels=F)
diabetes.cat[[i]]<-addNA(diabetes.cat[[i]],ifany=TRUE)
}
summary(diabetes.cat)
#ventilation
#glucose
diabetes.cat$Glucose[is.na(diabetes$Glucose)]<-sample(c("1", "2", "3", "4"),size= 5,replace=TRUE)
diabetes.cat$Glucose<-droplevels(diabetes.cat$Glucose)
#BMI
diabetes.cat$BMI[is.na(diabetes$BMI)]<-sample(c("1", "2", "3", "4"),size= 11,replace=TRUE)
diabetes.cat$BMI<-droplevels(diabetes.cat$BMI)
summary(diabetes.cat)
res.mca<-MCA(diabetes.cat,graph=FALSE,level.ventil = 0.04)
plot(res.mca,choix="ind",invisible="ind")
boxplot(diabetes$Age~is.na(diabetes$Skin.Thick))
marginmatrix(diabetes[,c("Age","Skin.Thick")])
boxplot(diabetes$Age~is.na(diabetes$Skin.Thick))
# deux variables incomplètes
marginmatrix(diabetes[,c("Insulin","Skin.Thick")])
# une variable complète (age) et une incomplète (skin.thick)
marginmatrix(diabetes[,c("Age","Skin.Thick")])
# deux variables incomplètes
marginmatrix(diabetes[,c("Insulin","Skin.Thick")])
# tous les couples
marginmatrix(diabetes[,-ncol(diabetes)],cex=.2,gap=0)
# deux variables incomplètes
marginmatrix(diabetes[,c("Glucose","BMI")])
aggr(diabetes)
# deux variables incomplètes
marginmatrix(diabetes[,c("Skin.Thick","BMI")])
#matrixplot
matrixplot(diabetes,sortby = 2)
#matrixplot
matrixplot(diabetes,sortby = 8)
install.packages("funModeling")
d_bins <- discretize_get_bins(data=diabetes)
library(funModeling)
library(funModeling)
d_bins <- discretize_get_bins(data=diabetes)
d_bins
discretize_df(diabetes, d_bins)
diabetes.cat <- discretize_df(diabetes, d_bins)
summary(diabetes.cat)
res.mca<-MCA(diabetes.cat,graph=FALSE,level.ventil = 0.04)
plot(res.mca,choix="ind",invisible="ind")
nrwo(diabetes)
nrow(diabetes)
nrow(diabetes)*0.04
summary(diabetes.cat) # modalités rares
nrow(diabetes)*0.04
summary(diabetes.cat) # modalités rares
35/nrow(diabetes)
summary(diabetes.cat) # modalités rares
?mice
res.mice <- mice(diabetes, maxit = 50, printFlag = FALSE)
res.mice <- mice(diabetes, maxit = 50, printFlag = FALSE)
tableau1 <- complete(res.mice, 1)
summary(tableau1)
plot(res.mice)
par(mfrow = c(3, 3))
sapply(colnames(diabetes)[1:8],
FUN = function(xx, diabetes){
boxplot(diabetes[,xx]~diabetes$Outcome,
main = xx,
ylab = xx,
xlab = "outcome")
}
, diabetes = diabetes)
densityplot(res.mice)
res.mice$method
par(mfrow = c(2, 2), mar = c(4, 4, 2, 1) + 0.1)
for (method.imp in c("pmm", "rf", "norm", "norm.boot")){
method <- res.mice$method
method["BMI"] <- method.imp
res.mice.tmp <- mice(diabetes, method = method, maxit = 20, printFlag = FALSE)
print(densityplot(res.mice.tmp,~ BMI,main=method.imp))
}
install.packages("randomForest")
for (method.imp in c("pmm", "rf", "norm", "norm.boot")){
method <- res.mice$method
method["BMI"] <- method.imp
res.mice.tmp <- mice(diabetes, method = method, maxit = 20, printFlag = FALSE)
print(densityplot(res.mice.tmp,~ BMI,main=method.imp))
}
# On effectue l'overimputation car il y a peu d'observations manquantes sur la variable BMI
nnodes <- detectCores()-1
# On effectue l'overimputation car il y a peu d'observations manquantes sur la variable BMI
nnodes <- detectCores()-1
#pmm
res.mice.over <- mice.par(diabetes, m=100, nnodes = nnodes, maxit=20)
res.over <- overimpute(res.mice.over,
plotinds = sample(seq(nrow(diabetes)),size=30),
plotvars = 6,
nnodes = nnodes)
#norm
method <- res.mice$method
method["BMI"] <- "norm"
res.mice.over <- mice.par(diabetes, m=100, nnodes = nnodes, maxit=20, method = method)
res.over <- overimpute(res.mice.over,
plotinds = sample(seq(nrow(diabetes)),size=30),
plotvars = 6,
nnodes = nnodes)
plotinds <- sample(seq(nrow(diabetes)),size=30)
#pmm
res.mice.over.pmm <- mice.par(diabetes, m=100, nnodes = nnodes, maxit=20)
#pmm
res.mice.over.pmm <- mice.par(diabetes, m=100, nnodes = nnodes, maxit=20)
res.over.pmm <- overimpute(res.mice.over.pmm,
plotinds = plotinds,
plotvars = 6,
nnodes = nnodes)
plotinds <- sample(seq(nrow(diabetes)),size=30)
par(mfrow=c(1,3))
#pmm
res.mice.over.pmm <- mice.par(diabetes, m = 100, nnodes = nnodes, maxit=20)
res.over.pmm <- overimpute(res.mice.over.pmm,
plotinds = plotinds,
plotvars = 6,
nnodes = nnodes)
#norm
method <- res.mice$method
method["BMI"] <- "norm"
res.mice.over.norm <- mice.par(diabetes, m=100, nnodes = nnodes, maxit=20, method = method)
res.over.norm <- overimpute(res.mice.over.norm,
plotinds = plotinds,
plotvars = 6,
nnodes = nnodes)
#RF
method <- res.mice$method
method["BMI"] <- "rf"
res.mice.over.rf <- mice.par(diabetes, m=100, nnodes = nnodes, maxit=20, method = method)
res.over.rf <- overimpute(res.mice.over.rf,
plotinds = plotinds,
plotvars = 6,
nnodes = nnodes)
don.vim <- cbind.data.frame(complete(res.mice), is.na(diabetes))
head(don.vim)
colnames(don.vim) <- c(colnames(diabetes), paste0(colnames(diabetes),"_imp"))
# pour le couple Age-Skin.Thick
par(mfrow=c(1,2))
marginmatrix(diabetes[, c("Insulin", "Glucose")])
marginmatrix(don.vim[, c("Insulin", "Glucose", "Insulin_imp", "Glucose_imp")], delimiter = "_imp")
diabetes.vim <- cbind.data.frame(complete(res.mice), is.na(diabetes))
head(diabetes.vim)
colnames(diabetes.vim) <- c(colnames(diabetes), paste0(colnames(diabetes),"_imp"))
marginmatrix(don.vim[, c("Age", "Skin.Thick", "Age_imp", "Skin.Thick_imp")], delimiter = "_imp")
marginmatrix(diabetes[, c("Age", "Skin.Thick")])
par(mfrow=c(1,2))
marginmatrix(diabetes[, c("Age", "Skin.Thick")])
marginmatrix(don.vim[, c("Age", "Skin.Thick", "Age_imp", "Skin.Thick_imp")], delimiter = "_imp")
data(sleep, package = "VIM")
## for imputed values
x_imp <- kNN(sleep[, 1:5])
x_imp
head(sleep)
head(x_imp)
marginmatrix(diabetes[, c("Insulin", "Glucose")])
marginmatrix(don.vim[, c("Insulin", "Glucose", "Insulin_imp", "Glucose_imp")], delimiter = "_imp")
marginmatrix(diabetes[, c("Insulin", "Glucose")])
marginmatrix(don.vim[, c("Insulin", "Glucose", "Insulin_imp", "Glucose_imp")], delimiter = "_imp")
summary(diabetes)
marginmatrix(don.vim[, c("Insulin", "Glucose", "Insulin_imp", "Glucose_imp")], delimiter = "_imp")
summary(diabetes)
marginmatrix(diabetes[, c("Insulin", "Glucose")])
marginmatrix(don.vim[, c("Insulin", "Glucose", "Insulin_imp", "Glucose_imp")], delimiter = "_imp")
# Insulin en fonction de Glucose : jaune = imputé sur la variable Insuline, marron sur la variable Glucose
#pour tous les couples
marginmatrix(don.vim, delimiter = "_imp")
marginmatrix(don.vim[, c("Insulin", "Glucose", "Insulin_imp", "Glucose_imp")], delimiter = "_imp")
fit <- with(res.mice, glm(Outcome~Skin.Thick,
family = binomial))
length(fit$analyses)
fit$analyses[[1]]
#pooling
res.pool<-pool(fit)
summary(res.pool)
res.glm.cc <- glm(Outcome~Skin.Thick, family=binomial,data=diabetes)
summary(res.glm.cc)
boxplot(diabetes$Skin.Thick)
delta <- c(0, 10, 20, 50)
imp.all <- vector("list", length(delta))
names(imp.all)<-delta
res.mice.over$post
post <- res.mice.over$post
for (i in 1:length(delta)){
d <- delta[i]
cmd <- paste("imp[[j]][,i] <- imp[[j]][,i] +", d)
post["Skin.Thick"] <- cmd
imp <- mice(diabetes, post = post, maxit = 5, seed = i, print = FALSE)
imp.all[[i]] <- imp
}
for (i in 1:length(delta)){
d <- delta[i]
cmd <- paste("imp[[j]][,i] <- imp[[j]][,i] +", d)
post["Skin.Thick"] <- cmd
imp <- mice(diabetes, post = post, maxit = 5, seed = i, print = FALSE)
imp.all[[i]] <- imp
}
par(mfrow=c(1,2))
par(mfrow=c(1,2))
bwplot(imp.all[["0"]])
bwplot(imp.all[["50"]])
boxplot(diabetes$Skin.Thick)
densityplot(imp.all[["0"]], lwd = 3)
densityplot(imp.all[["30"]], lwd = 3)
densityplot(imp.all[["30"]], lwd = 3)
densityplot(imp.all[["0"]], lwd = 3)
delta <- c(0, 10, 20, 50)
imp.all <- vector("list", length(delta))
names(imp.all)<-delta
post <- res.mice.over$post
for (i in 1:length(delta)){
d <- delta[i]
cmd <- paste("imp[[j]][,i] <- imp[[j]][,i] +", d)
post["Skin.Thick"] <- cmd
imp <- mice(diabetes, post = post, maxit = 5, seed = i, print = FALSE)
imp.all[[i]] <- imp
}
densityplot(imp.all[["0"]], lwd = 3)
densityplot(imp.all[["50"]], lwd = 3)
library(mice)
library(micemd)
library(FactoMineR)
library(parallel)
library(DescTools)
library(VIM)
library(mvtnorm)
library(funModeling)
load("Data/diabetes.Rdata")
dim(diabetes)
summary(diabetes)
library(missMDA)
pairsVIM(diabetes)
pairsVIM(diabetes,cex=.2)
pairs(diabetes,cex=.2)
pairs(diabetes[, 1:8],cex=.2)
PlotCorr(diabetes[,1:8])
PlotCorr(cor(diabetes[,1:8]))
cor(diabetes[,1:8])
PlotCorr(cor(diabetes[,1:8],na.rm=TRUE))
cor(diabetes[,1:8])
PlotCorr(cor(diabetes[,1:8],use = "pairwise.complete.obs"))
?mice
res.mice <- mice(diabetes, maxit = 50, printFlag = FALSE)
tableau1 <- complete(res.mice, 1)
summary(tableau1)
tableau1 <- complete(res.mice, 1)
plot(res.mice)
densityplot(res.mice)
diabetes.vim <- cbind.data.frame(complete(res.mice), is.na(diabetes))
head(diabetes.vim)
colnames(diabetes.vim) <- c(colnames(diabetes), paste0(colnames(diabetes),"_imp"))
# pour le couple Age-Skin.Thick
par(mfrow=c(1,2))
marginmatrix(diabetes[, c("Age", "Skin.Thick")])
marginmatrix(don.vim[, c("Age", "Skin.Thick", "Age_imp", "Skin.Thick_imp")], delimiter = "_imp")
summary(diabetes)
marginmatrix(diabetes[, c("Insulin", "Glucose")])
marginmatrix(don.vim[, c("Insulin", "Glucose", "Insulin_imp", "Glucose_imp")], delimiter = "_imp")
nnodes <- detectCores()-1
res.mice.over <- mice.par(diabetes, m = 100, nnodes = nnodes, maxit = 20, method = method)
res.mice.over <- mice.par(diabetes, m = 100, nnodes = nnodes, maxit = 20)
plotinds <- sample(seq(nrow(diabetes)), size = 30)
res.over <- overimpute(res.mice.over,
plotinds = plotinds,
nnodes = nnodes)
res.mice$method
par(mfrow = c(2, 2), mar = c(4, 4, 2, 1) + 0.1)
for (method.imp in c("pmm", "rf", "norm", "norm.boot")){
method <- res.mice$method
method["BMI"] <- method.imp
res.mice.tmp <- mice(diabetes, method = method, maxit = 20, printFlag = FALSE)
print(densityplot(res.mice.tmp,~ BMI,main=method.imp))
}
# On effectue l'overimputation
nnodes <- detectCores()-1
plotinds <- sample(seq(nrow(diabetes)),size=30)
par(mfrow=c(1,3))
res.mice$method
par(mfrow = c(2, 2), mar = c(4, 4, 2, 1) + 0.1)
for (method.imp in c("rf", "norm", "norm.boot")){
method <- res.mice$method
method["BMI"] <- method.imp
res.mice.tmp <- mice(diabetes, method = method, maxit = 20, printFlag = FALSE)
print(densityplot(res.mice.tmp,~ BMI,main=method.imp))
}
# On effectue l'overimputation
par(mfrow=c(1,2))
#norm
method <- res.mice$method
method["BMI"] <- "norm"
res.mice.over.norm <- mice.par(diabetes, m = 100, nnodes = nnodes, maxit = 20, method = method)
res.over.norm <- overimpute(res.mice.over.norm,
plotinds = plotinds,
plotvars = 6,
nnodes = nnodes)
#RF
method <- res.mice$method
method["BMI"] <- "rf"
res.mice.over.rf <- mice.par(diabetes, m = 100, nnodes = nnodes, maxit = 20, method = method)
res.over.rf <- overimpute(res.mice.over.rf,
plotinds = plotinds,
plotvars = 6,
nnodes = nnodes)
res.over.rf
fit <- with(res.mice, glm(Outcome~.,
family = binomial))
dput(colnames(diabetes)
dput(colnames(diabetes))
dput(colnames(diabetes))
fit <- with(res.mice, glm(Outcome~Preg+Glucose+BP+Skin.Thick+Insulin+BMI+DPF+ Age,
family = binomial))
length(fit$analyses)
fit$analyses[[1]]
#pooling
res.pool <- pool(fit)
summary(res.pool)
res.glm.cc <- glm(Outcome~., family=binomial,data=diabetes)
summary(res.glm.cc)
colnames(diabetes)
knitr::opts_chunk$set(echo = FALSE, eval=FALSE)
load("https://vincentaudigier.weebly.com/uploads/1/7/3/1/17317324/diabetes.rdata")
