Dimension Reduction - Correspondence Analysis of a Square Table

From Q
Jump to: navigation, search

Correspondence Analysis of Square Tables can be used to visualize tables of frequencies when the row and column labels are the same. See this blog post for more description and examples.

For more information about correspondence analysis in general, see Dimension Reduction - Correspondence Analysis of a Table.

Example

Options

Input table The name of a table containing data to be analyzed. The row and column names should match exactly. The table should only contain a single statistic (e.g., Total %). The statistic that is shown first will be used in the analysis. For example, if you have a table showing Total % and Column %, then Column % will be used (whereas Total % is the more orthodox choice).

Paste or type table As an alternative to Input table, you can instead open up a blank spreadsheet into which you can manually enter or paste table.

Output: Either Scatterplot, Bubble Chart or Text

Bubble sizes: A numeric vector with names that are the same as the row names of the input table. If no vector is supplied then the diagonal of the input table will be used.

Supplementary A comma delimited list of labels which are not used to fit the low-dimensional space, but are plotted in the space.

Rows to ignore, Columns to ignore The names of any rows or columns to be removed from the table prior to analysis.

Horizontal dimension, Vertical dimension The dimensions to plot on the horizontal and vertical axes respectively. Since dimensions are output in order of decreasing variance, the first and second dimensions are usually of most interest.

Flip horizontally, Flip vertically Whether to reverse (i.e. invert the sign of) the output coordinates for the specified dimension(s). This may allow better visualization, especially when comparing maps that are similar apart from reflections.

Use logos for labels When this option is selected, the user can replace the labels in the scatterplot with logos. The logos should be supplied as a comma-separated list of URLs.

Maximum number of labels to plot The option limits the number of labels shown. It is useful when there are many points with overlapping labels. The remaining points will be shown without labels.

Chart title Optional title for the scatterplot or bubble chart.

Color Control the color of the scatterplot points

Additional options are available by editing the code.

Acknowledgements

The R package ca is used to compute the correspondence analysis.

Code

form.setHeading('Correspondence Analysis of a Square Table');
var allow_control_groups = Q.fileFormatVersion() > 10.9; // Group controls for Displayr and later versions of Q
function isEmpty(x) { return (x == undefined || x.getValue() == null && (x.getValues() == null || x.getValues().length == 0)) }
function isBlankSheet(x) { return (x.getValue() == null || x.getValue().length == 0) }

var controls = [];
if (!form.dataEntry)
{
    var tableInput = form.dropBox({label: "Input table", types:["table", "RItem"], name: "formTableToAnalyse",
                                   multi: false, prompt: "Select the table to be analyzed"});
    controls.push(tableInput);
}
else
{
    var tableInput = form.dropBox({label: "Input table", types:["table", "RItem"], multi:true, name: "formTableToAnalyse",  multi : false,
                                   required: false, prompt: "Select the table to be analyzed"});
    var pasteInput = form.dataEntry({name: "formEnteredData", label: "Paste or type table", prompt: "Opens a spreadsheet into which you can paste data."})
    if (!allow_control_groups || isBlankSheet(pasteInput) || !isEmpty(tableInput))
        controls.push(tableInput);
    if (!allow_control_groups || isEmpty(tableInput) || !isBlankSheet(pasteInput))
        controls.push(pasteInput);
}
var outOpt = form.comboBox({label: "Output", alternatives:["Scatterplot", "Bubble Chart", "Text"], name: "formOutput",  multi : false,
                            default_value: "Scatterplot", propmpt: "The type of output to be produced"});
controls.push(outOpt);

if (outOpt.getValue() == "Bubble Chart") 
{
    var bSize = form.dropBox({label: "Bubble sizes", types:["table", "RItem"], name: "formBubbleSizes",  multi : false, required: false,
                              prompt: "Sizes of the bubbles of the row points, labelled with the row labels. Leave blank to use diagonals of the input matrix"});
    controls.push(bSize);
    var legTitle = form.textBox({label: "Bubble legend title", type: "text", default_value: "", name: "formLegendTitle", required: false,
                                 prompt: "The title of the legend showing the bubble sizes"});
    controls.push(legTitle);
}



var supp = form.textBox({label: "Supplementary", name: "formSupplementary", required: false,
                         prompt: "Comma-delimited list of labels to be excluded from fitting but are plotted"});
controls.push(supp);
var rowIgnore = form.textBox({label: "Rows to ignore", type: "text", default_value: "NET, Total, SUM", name: "formIgnoreRows",
                              required: false, prompt: "Comma-delimited list of rows to be excluded"});
controls.push(rowIgnore);
var colIgnore = form.textBox({label: "Columns to ignore", type: "text", default_value: "NET, Total, SUM", name: "formIgnoreColumns",
                              required: false, prompt: "Comma-delimited list of columns to be excluded"});
controls.push(colIgnore);
 
if (["Scatterplot", "Bubble Chart"].indexOf(outOpt.getValue()) != -1)
{
    var dim1 = form.numericUpDown({label: "Horizontal dimension", name: "formDim1", default_value:1,
                                   prompt: "The dimension to be plotted horizontally"});
    controls.push(dim1);
    var dim2 = form.numericUpDown({label: "Vertical dimension", name: "formDim2", default_value:2,
                                   prompt: "The dimension to be plotted vertically"});
    controls.push(dim2);
    var mirror1 = form.checkBox({label: "Flip horizontally", name:"formMirrorDim1",
                                 default_value: false, prompt: "Reverse the points along the horizontal axis"});
    controls.push(mirror1);
    var mirror2 = form.checkBox({label: "Flip vertically", name:"formMirrorDim2", default_value: false,
                                 default_value: false, prompt: "Reverse the points along the vertical axis"});
    controls.push(mirror2);
    var maxLab = form.numericUpDown({label: "Maximum number of labels to plot", name: "formMaxLab", default_value:20,
                                     prompt: "The maximum number of labels to show"});
    controls.push(maxLab);
    if (outOpt.getValue() == "Scatterplot")
    {
        var logoOpt = form.checkBox({label: "Use logos for labels", name:"formUseLogo", default_value: false, prompt: "Replace the text labels with logos"});
        controls.push(logoOpt);
        if (logoOpt.getValue())
        {
            var logoUrl = form.textBox({name: "formLogos", label: "Logos", prompt: "Enter URLs as a comma separated list", type: "Text", required: true});
            controls.push(logoUrl);
            var logoSize = form.numericUpDown({name: "formLogoSize", label: "Logo size", default_value: 0.5, increment: 0.1});
            controls.push(logoSize);
        }
    }
    var title = form.textBox({label: "Chart title", type: "text", default_value: "Correspondence analysis", name: "formTitle", required: false});
    controls.push(title);
    var color = form.colorPicker({label: "Color", name: "rowColor", default_value:"#5B9BD5", prompt: "The color of the points"});
    controls.push(color);
}
if (["Scatterplot", "Bubble Chart"].indexOf(outOpt.getValue()) != -1)
{
    var titleSz = form.numericUpDown({name:"formTitleFontSize", label:"Title font size", default_value: 20});
    controls.push(titleSz);
    var xtitleSz = form.numericUpDown({name:"formXTitleFontSize", label:"X-axis title font size", default_value: 16});
    controls.push(xtitleSz);
    var ytitleSz = form.numericUpDown({name:"formYTitleFontSize", label:"Y-axis title font size", default_value: 16});
    controls.push(ytitleSz);
    var labSz = form.numericUpDown({name:"formLabelsFontSize", label:"Labels font size", default_value: 14});
    controls.push(labSz);
    var axisSz = form.numericUpDown({name:"formAxisFontSize", label:"Axis labels font size", default_value: 10});
    controls.push(axisSz);
    var legendSz = form.numericUpDown({name:"formLegendFontSize", label:"Legend font size", default_value: 15});
    controls.push(legendSz);
    var gridShow = form.checkBox({label:"Show gridlines", name:"formShowGridlines", default_value: true});
    controls.push(gridShow);
}
form.setInputControls(controls);
library(flipDimensionReduction)
x <- get0("formTableToAnalyse")
if (is.null(x))
   x <- flipTransformations::ParseEnteredData(formEnteredData, want.data.frame = TRUE, want.col.names = TRUE, want.row.names = TRUE)
x <- flipTables::TidyTabularData(x, row.names.to.remove = formIgnoreRows, col.names.to.remove = formIgnoreColumns)   

ca.sq <- CorrespondenceAnalysis(x, square = TRUE,
    output = formOutput,
    supplementary = formSupplementary,
    mirror.horizontal = if(exists("formMirrorDim1")) formMirrorDim1 else FALSE,
    mirror.vertical = if(exists("formMirrorDim2")) formMirrorDim2 else FALSE,
    chart.title = formTitle,
    logos = if (formOutput=="Scatterplot" && formUseLogo) formLogos else NULL,
    logo.size = formLogoSize,
    bubble.size = if (!is.null(get0("formBubbleSizes"))) formBubbleSizes else diag(x),
    bubble.title = formLegendTitle,
    title.font.size = if (exists("formTitleFontSize")) formTitleFontSize else 0,
    x.title.font.size = if (exists("formXTitleFontSize")) formXTitleFontSize else 0, 
    y.title.font.size = if (exists("formYTitleFontSize")) formYTitleFontSize else 0, 
    labels.font.size = if (exists("formLabelsFontSize")) formLabelsFontSize else 0,
    axis.font.size = if (exists("formAxisFontSize")) formAxisFontSize else 0,
    legend.font.size = if (exists("formLegendFontSize")) formLegendFontSize else 0,
    show.gridlines = if (exists("formShowGridlines")) formShowGridlines else FALSE,
    row.color = rowColor,
    dim1.plot = get0("formDim1"),
    dim2.plot = get0("formDim2"),
    max.row.labels.plot = if (exists("formMaxLab")) formMaxLab else 200)