Correlation - Scatterplot Matrix

Create a matrix where the lower triangle displays scatterplots of variable pairs, the diagonal displays variable histograms and the upper triangle displays correlations of variable pairs. Also known as a scatter plot matrix or SPLOM.
Shows a matrix where the lower triangle displays scatterplots of variable pairs, the diagonal displays variable histograms and the upper triangle displays correlations of variable pairs. Also known as a scatter plot matrix or SPLOM.
How to Create a Scatterplot Matrix
- Add the object by selecting from the menu Create > Correlation > Scatterplot Matrix
- Under Inputs > Variables to plot select the variables to include in the matrix.
Examples
An example of a scatterplot matrix with 3 variables and fitted LOESS curves:
An example of a scatterplot matrix with categorical variables:
Options
The options in the Object Inspector are organized into two tabs: Inputs and Properties.
Inputs
Variables to plot The variables that will be shown in the matrix. These can be numeric, categorical or ordered categorical.
Variable names Whether to show variable names in the matrix, instead of variable labels.
Missing data Method for dealing with missing data. See Missing Data Options.
- Error if missing data
- Exclude cases with missing data
- Use partial data
Fitted line
- None No fitted line is shown in each scatterplot.
- Straight A line of best fit (from linear regression) is shown in each scatterplot.
- LOESS A line generated using fitted values from locally weighted polynomial regression is shown in each scatterplot.
Scatter point type The type of plot point to display.
- Small dots
- Dots
- Big dots
- Circles
Scatter point modifications
- None No modifications.
- Jitter Add noise to each point so that areas with closely positioned points appear denser.
- Enlarge points with multiple observations Multiple points with identical positions will appear as an enlarged point. This is the default.
Properties
This tab contains options for formatting the size of the object, as well as the underlying R code used to create the visualization, and the JavaScript code use to customize the Object Inspector itself (see Object Inspector for more details about these options). Additional options are available by editing the code.
Acknowledgements
Uses the R package weights.
Code
form.setHeading("Scatterplot Matrix");
form.dropBox({name: "formInput", label: "Variables to plot",
types: ["V:numeric, categorical, ordered categorical"],
multi: true, min_inputs: 2, height: 8});
form.checkBox({label: "Variable names", name: "formNames", default_value: false, prompt: "Display names instead of labels"});
form.comboBox({label: "Missing data",
alternatives: ["Error if missing data", "Exclude cases with missing data", "Use partial data"],
name: "formMissing", default_value: "Use partial data",
prompt: "Options for handling cases with missing data"});
form.comboBox({label: "Fitted line",
alternatives: ["None", "Straight", "LOESS"],
name: "formLine",
default_value: "None", prompt: "Draw a fitted line or curve"});
form.comboBox({label: "Scatter point type",
alternatives: ["Small dots", "Dots", "Big dots", "Circles"],
name: "formSymbol",
default_value: "Dots"})
form.comboBox({label: "Scatter point modifications",
alternatives: ["None", "Jitter", "Enlarge points with multiple observations"],
name: "formPointMods",
default_value: "Enlarge points with multiple observations",
prompt: "Options for dealing with overlapping points"})
# Random seed used for jitter
seed <- 123
# Scatterplot
panel.scatter <- function(x, y, pch = par("pch"), ...)
{
if (formPointMods == "None")
{
points(x, y, pch = pch, ...)
}
else if (formPointMods == "Jitter")
{
set.seed(seed)
jitter.x <- x + rnorm(length(x)) * (max(x, na.rm = TRUE) - min(x, na.rm = TRUE)) * 0.01
jitter.y <- y + rnorm(length(y)) * (max(y, na.rm = TRUE) - min(y, na.rm = TRUE)) * 0.01
points(jitter.x, jitter.y, pch = pch, ...)
}
else if (formPointMods == "Enlarge points with multiple observations")
{
c.hash <- hash()
x.hash <- hash()
y.hash <- hash()
for (i in seq(x))
{
key <- paste(x[i], y[i], collapse = ",")
if (has.key(key, c.hash))
c.hash[[key]] <- c.hash[[key]] + wgt[i]
else
{
c.hash[[key]] <- wgt[i]
x.hash[[key]] <- x[i]
y.hash[[key]] <- y[i]
}
}
cex <- pmax(1, 10 * sqrt(values(c.hash) / sum(wgt)))
points(values(x.hash), values(y.hash), pch = pch, cex = cex, ...)
}
else
stop(paste("Point setting not handled:", formPointMods))
if (formLine == "Straight")
{
reg <- lm(y ~ x, weights = wgt)
abline(reg, col = "red", ...)
}
else if (formLine == "LOESS")
{
ok <- is.finite(x) & is.finite(y)
if (any(ok))
{
xok <- x[ok]
yok <- y[ok]
j <- order(xok)
suppressWarnings(fitted.y <- loess(yok ~ xok, weights = wgt[ok])$fitted)
lines(xok[j], fitted.y[j], col = "red", ...)
}
}
}
# Histograms for the diagonal
panel.hist <- function(x, ...)
{
usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- weights::wtd.hist(x, plot = FALSE, weight = wgt)
breaks <- h$breaks
nB <- length(breaks)
y <- h$counts
y <- y / max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = "gray")
}
# Correlations for the upper triangle
panel.cor <- function(x, y, digits = 2, ...)
{
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- weights::wtd.cors(x, y, weight = wgt)
txt <- FormatAsReal(r, decimals = digits)
text(0.5, 0.5, txt, cex = 1.2)
}
# Variable names/labels in the diagonal
panel.label <- function(x, y, labels, cex, font, ...)
{
text(0.5, 0.85, labels, cex = 1.2)
}
library(hash)
library(flipData)
library(flipFormat)
library(flipTransformations)
dat <- AsNumeric(ProcessQVariables(QDataFrame(formInput)), binary = FALSE)
# Changing names to labels.
if (!formNames)
names(dat) <- ExtractCommonPrefix(Labels(dat))$shortened.labels
wgt <- if (is.null(QCalibratedWeight)) {
rep(1, nrow(dat))
} else {
QCalibratedWeight
}
dat <- dat[QFilter, ]
wgt <- wgt[QFilter]
processed.data <- if (formMissing == "Error if missing data") {
ErrorIfMissingDataFound(dat)
} else if (formMissing == "Exclude cases with missing data") {
RemoveCasesWithAnyNA(dat)
} else if (formMissing == "Use partial data") {
RemoveCasesWithAllNA(dat)
} else
stop(c("Option not handled:", formMissing))
wgt <- wgt[row.names(dat) %in% rownames(processed.data)]
pch <- if (formSymbol == "Small dots") {
"."
} else if (formSymbol == "Dots") {
20
} else if (formSymbol == "Big dots") {
19
} else if (formSymbol == "Circles") {
"O"
} else
stop(paste("Symbol not found:", formSymbol))
scatterplotmatrix <- pairs(processed.data,
pch = pch,
lower.panel = panel.scatter,
diag.panel = panel.hist,
upper.panel = panel.cor,
text.panel = panel.label,
gap = 0.5)