Plot Pairwise Plots for Imputed and True Data
Source:R/approach_vaeac.R
plot_vaeac_imputed_ggpairs.Rd
A function that creates a matrix of plots (GGally::ggpairs()
) from
generated imputations from the unconditioned distribution \(p(\boldsymbol{x})\) estimated by
a vaeac
model, and then compares the imputed values with data from the true distribution (if provided).
See ggpairs for an
introduction to GGally::ggpairs()
, and the corresponding
vignette.
Usage
plot_vaeac_imputed_ggpairs(
explanation,
which_vaeac_model = "best",
x_true = NULL,
add_title = TRUE,
alpha = 0.5,
upper_cont = c("cor", "points", "smooth", "smooth_loess", "density", "blank"),
upper_cat = c("count", "cross", "ratio", "facetbar", "blank"),
upper_mix = c("box", "box_no_facet", "dot", "dot_no_facet", "facethist",
"facetdensity", "denstrip", "blank"),
lower_cont = c("points", "smooth", "smooth_loess", "density", "cor", "blank"),
lower_cat = c("facetbar", "ratio", "count", "cross", "blank"),
lower_mix = c("facetdensity", "box", "box_no_facet", "dot", "dot_no_facet",
"facethist", "denstrip", "blank"),
diag_cont = c("densityDiag", "barDiag", "blankDiag"),
diag_cat = c("barDiag", "blankDiag"),
cor_method = c("pearson", "kendall", "spearman")
)
Arguments
- explanation
Shapr list. The output list from the
explain()
function.- which_vaeac_model
String. Indicating which
vaeac
model to use when generating the samples. Possible options are always'best'
,'best_running'
, and'last'
. All possible options can be obtained by callingnames(explanation$internal$parameters$vaeac$models)
.- x_true
Data.table containing the data from the distribution that the
vaeac
model is fitted to.- add_title
Logical. If
TRUE
, then a title is added to the plot based on the internal description of thevaeac
model specified inwhich_vaeac_model
.- alpha
Numeric between
0
and1
(default is0.5
). The degree of color transparency.- upper_cont
String. Type of plot to use in upper triangle for continuous features, see
GGally::ggpairs()
. Possible options are:'cor'
(default),'points'
,'smooth'
,'smooth_loess'
,'density'
, and'blank'
.- upper_cat
String. Type of plot to use in upper triangle for categorical features, see
GGally::ggpairs()
. Possible options are:'count'
(default),'cross'
,'ratio'
,'facetbar'
, and'blank'
.- upper_mix
String. Type of plot to use in upper triangle for mixed features, see
GGally::ggpairs()
. Possible options are:'box'
(default),'box_no_facet'
,'dot'
,'dot_no_facet'
,'facethist'
,'facetdensity'
,'denstrip'
, and'blank'
- lower_cont
String. Type of plot to use in lower triangle for continuous features, see
GGally::ggpairs()
. Possible options are:'points'
(default),'smooth'
,'smooth_loess'
,'density'
,'cor'
, and'blank'
.- lower_cat
String. Type of plot to use in lower triangle for categorical features, see
GGally::ggpairs()
. Possible options are:'facetbar'
(default),'ratio'
,'count'
,'cross'
, and'blank'
.- lower_mix
String. Type of plot to use in lower triangle for mixed features, see
GGally::ggpairs()
. Possible options are:'facetdensity'
(default),'box'
,'box_no_facet'
,'dot'
,'dot_no_facet'
,'facethist'
,'denstrip'
, and'blank'
.- diag_cont
String. Type of plot to use on the diagonal for continuous features, see
GGally::ggpairs()
. Possible options are:'densityDiag'
(default),'barDiag'
, and'blankDiag'
.- diag_cat
String. Type of plot to use on the diagonal for categorical features, see
GGally::ggpairs()
. Possible options are:'barDiag'
(default) and'blankDiag'
.- cor_method
String. Type of correlation measure, see
GGally::ggpairs()
. Possible options are:'pearson'
(default),'kendall'
, and'spearman'
.
Value
A GGally::ggpairs()
figure.
Examples
if (FALSE) { # \dontrun{
library(xgboost)
library(data.table)
library(shapr)
data("airquality")
data <- data.table::as.data.table(airquality)
data <- data[complete.cases(data), ]
x_var <- c("Solar.R", "Wind", "Temp", "Month")
y_var <- "Ozone"
ind_x_explain <- 1:6
x_train <- data[-ind_x_explain, ..x_var]
y_train <- data[-ind_x_explain, get(y_var)]
x_explain <- data[ind_x_explain, ..x_var]
# Fitting a basic xgboost model to the training data
model <- xgboost(
data = as.matrix(x_train),
label = y_train,
nround = 100,
verbose = FALSE
)
explanation <- explain(
model = model,
x_explain = x_explain,
x_train = x_train,
approach = "vaeac",
phi0 = mean(y_train),
n_MC_samples = 1,
vaeac.epochs = 10,
vaeac.n_vaeacs_initialize = 1
)
# Plot the results
figure <- plot_vaeac_imputed_ggpairs(
explanation = explanation,
which_vaeac_model = "best",
x_true = x_train,
add_title = TRUE
)
figure
# Note that this is an ggplot2 object which we can alter, e.g., we can change the colors.
figure +
ggplot2::scale_color_manual(values = c("#E69F00", "#999999")) +
ggplot2::scale_fill_manual(values = c("#E69F00", "#999999"))
} # }