Plot Pairwise Plots for Imputed and True Data — vaeac_plot_imputed

A function that creates a matrix of plots (GGally::ggpairs()) from generated imputations from the unconditioned distribution $p(\boldsymbol{x})$ estimated by a vaeac model, and then compares the imputed values with data from the true distribution (if provided). See ggpairs for an introduction to GGally::ggpairs(), and the corresponding vignette.

vaeac_plot_imputed_ggpairs(
  explanation,
  which_vaeac_model = "best",
  x_true = NULL,
  add_title = TRUE,
  alpha = 0.5,
  upper_cont = c("cor", "points", "smooth", "smooth_loess", "density", "blank"),
  upper_cat = c("count", "cross", "ratio", "facetbar", "blank"),
  upper_mix = c("box", "box_no_facet", "dot", "dot_no_facet", "facethist",
    "facetdensity", "denstrip", "blank"),
  lower_cont = c("points", "smooth", "smooth_loess", "density", "cor", "blank"),
  lower_cat = c("facetbar", "ratio", "count", "cross", "blank"),
  lower_mix = c("facetdensity", "box", "box_no_facet", "dot", "dot_no_facet",
    "facethist", "denstrip", "blank"),
  diag_cont = c("densityDiag", "barDiag", "blankDiag"),
  diag_cat = c("barDiag", "blankDiag"),
  cor_method = c("pearson", "kendall", "spearman")
)

Arguments

explanation: Shapr list. The output list from the explain() function.
which_vaeac_model: String. Indicating which vaeac model to use when generating the samples. Possible options are always 'best', 'best_running', and 'last'. All possible options can be obtained by calling names(explanation$internal$parameters$vaeac$models).
x_true: Data.table containing the data from the distribution that the vaeac model is fitted to.
add_title: Logical. If TRUE, then a title is added to the plot based on the internal description of the vaeac model specified in which_vaeac_model.
alpha: Numeric between 0 and 1 (default is 0.5). The degree of color transparency.
upper_cont: String. Type of plot to use in upper triangle for continuous features, see GGally::ggpairs(). Possible options are: 'cor' (default), 'points', 'smooth', 'smooth_loess', 'density', and 'blank'.
upper_cat: String. Type of plot to use in upper triangle for categorical features, see GGally::ggpairs(). Possible options are: 'count' (default), 'cross', 'ratio', 'facetbar', and 'blank'.
upper_mix: String. Type of plot to use in upper triangle for mixed features, see GGally::ggpairs(). Possible options are: 'box' (default), 'box_no_facet', 'dot', 'dot_no_facet', 'facethist', 'facetdensity', 'denstrip', and 'blank'
lower_cont: String. Type of plot to use in lower triangle for continuous features, see GGally::ggpairs(). Possible options are: 'points' (default), 'smooth', 'smooth_loess', 'density', 'cor', and 'blank'.
lower_cat: String. Type of plot to use in lower triangle for categorical features, see GGally::ggpairs(). Possible options are: 'facetbar' (default), 'ratio', 'count', 'cross', and 'blank'.
lower_mix: String. Type of plot to use in lower triangle for mixed features, see GGally::ggpairs(). Possible options are: 'facetdensity' (default), 'box', 'box_no_facet', 'dot', 'dot_no_facet', 'facethist', 'denstrip', and 'blank'.
diag_cont: String. Type of plot to use on the diagonal for continuous features, see GGally::ggpairs(). Possible options are: 'densityDiag' (default), 'barDiag', and 'blankDiag'.
diag_cat: String. Type of plot to use on the diagonal for categorical features, see GGally::ggpairs(). Possible options are: 'barDiag' (default) and 'blankDiag'.
cor_method: String. Type of correlation measure, see GGally::ggpairs(). Possible options are: 'pearson' (default), 'kendall', and 'spearman'.

Value

A GGally::ggpairs() figure.

Author

Lars Henry Berge Olsen

Examples

if (FALSE) { # \dontrun{
library(xgboost)
library(data.table)
library(shapr)

data("airquality")
data <- data.table::as.data.table(airquality)
data <- data[complete.cases(data), ]

x_var <- c("Solar.R", "Wind", "Temp", "Month")
y_var <- "Ozone"

ind_x_explain <- 1:6
x_train <- data[-ind_x_explain, ..x_var]
y_train <- data[-ind_x_explain, get(y_var)]
x_explain <- data[ind_x_explain, ..x_var]

# Fitting a basic xgboost model to the training data
model <- xgboost(
  data = as.matrix(x_train),
  label = y_train,
  nround = 100,
  verbose = FALSE
)

explanation <- explain(
  model = model,
  x_explain = x_explain,
  x_train = x_train,
  approach = "vaeac",
  phi0 = mean(y_train),
  n_MC_samples = 1,
  vaeac.epochs = 10,
  vaeac.n_vaeacs_initialize = 1
)

# Plot the results
figure <- vaeac_plot_imputed_ggpairs(
  explanation = explanation,
  which_vaeac_model = "best",
  x_true = x_train,
  add_title = TRUE
)
figure

# Note that this is an ggplot2 object which we can alter, e.g., we can change the colors.
figure +
  ggplot2::scale_color_manual(values = c("#E69F00", "#999999")) +
  ggplot2::scale_fill_manual(values = c("#E69F00", "#999999"))
} # }