Calculation - Sum Each Column - Table(s)

From Q
Jump to navigation Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!

This page contains the Standard R code for Calculation - Sum Each Column, and can be used to compute the sum for each column in a table.

Example

Consider the following example showing vote totals by state in the 2020 U.S. presidential election.

After selecting the input table and running this feature, the output is

Options

Input The table to be used in the calculation.

Calculate for inputs with incomplete data If this option is checked, than any missing values the input will be ignored in the calculation. If unchecked, then missing values are not removed before calculation and will propagate as missing values in the output.

Rows to include This listbox shows all row labels in the input. The calculation will not performed for any rows whose labels are unselected here and they will not be shown in the output. By default the "SUM" and "NET" rows and columns are not shown in the output.

Columns to include As above, but for columns; any column labels unselected here will not be included in the output.

Technical Details

When Calculate for inputs with incomplete data is checked and the input to the calculation consists entirely of missing values, then the returned output value is set to missing data. Prior to 5th October, 2021 the returned output value was different in this situation. For example, consider the input into the calculation with the three values.

A B C
NaN NaN NaN

Then applying Sum Each Column to this input returned the value of zero for each element before 5th October, 2021. After this date, the Sum Each Column function returns the output value of NaN for each element.

Sum Each Column
A B C
before 5th October, 2021 0 0 0
after 5th October, 2021 NaN NaN NaN

Code

const UNCHECK_NAMES = ["SUM", "NET", "TOTAL"];
const MULTI_QUESTIONTYPES = ["Text - Multi", "Pick One - Multi",
                             "Pick Any - Compact",
                             "Pick Any - Grid", "Number - Grid"];
const ALLOWED_R_CLASSES = ["NULL", "numeric", "integer", "logical", "factor", "matrix", "array", "data.frame", "table"];

function getInputNames(input, dim = 0){
    var input_names;
    var listbox_names = {};
    let input_type = input.type;
    if (input_type === "R Output") {
        try {
            var output_class = input.outputClasses;
            if (output_class.includes("array") || output_class.includes("matrix")) {
                var dimnames = input.data.getAttribute([], "dimnames");
                if (dim < dimnames.length && dimnames[dim] != null)
                    input_names = dimnames[dim];
                else
                    input_names = [];
            } else if (output_class.includes("data.frame")) {
                if (dim === 1)
                    input_names = input.data.getAttribute([], "names");
                else {
                    let row_names = input.data.getAttribute([], "row.names");
                    input_names = typeof(row_names[0]) === "string" ? row_names : [];
                }
            } else {
                input_names = dim === 0 ? input.data.getAttribute([], "names") : [];
            }
        }catch(e) {
            input_names = [];
        }
        listbox_names["names"] = input_names;
        listbox_names["initial"] = filterSingleNames(input_names);
    } else {
        let primary_type = input.primary.variableSetStructure;
        let has_multi_or_grid = primary_type.endsWith("Multi") || primary_type.endsWith("Grid");
        let has_columns = !!input.secondary || has_multi_or_grid || input.cellStatistics.length > 1;
        listbox_names = {names: ["foo"], initial: has_columns ? ["bar"] : []};//getTableDimNames(input, dim);
    }
    // DS-3147: replace newline chars/any whitespace with single space
    if (listbox_names["names"].length > 0) {
        Object.keys(listbox_names).map(key => {
            listbox_names[key] = listbox_names[key].map(str => typeof(str) === "string" ? str.replace(/\s+/g, " ") : str);
        });
    }
    return listbox_names;
}

function getTableDimNames(table, dim)
{
    let has_primary = table.primary != null;
    let table_output_names = {"names": [], "initial": []};
    if (has_primary)
    {
        let table_output = table.calculateOutput();
        let is_crosstab_or_multi_or_raw = table.secondary.type === "Question"
	|| MULTI_QUESTIONTYPES.includes(table.primary.questionType)
	|| table.secondary === "RAW DATA";
        if (table.primary.isBanner && table.secondary === "SUMMARY")
            is_crosstab_or_multi_or_raw = false;
        if (dim === 0)
        {
            let row_names = table_output.rowLabels;
            let row_spans = table_output.rowSpans;
            let row_indices = table_output.rowIndices(include_nets_sums = false);
            if (row_spans.length > 1)
            {
                table_output_names = flattenSpanNames(row_names, row_spans);
            } else
            {
                let initial = !!row_indices ? row_names.filter((name, i) => row_indices.includes(i)) : filterSingleNames(row_names);
                table_output_names = {"names": row_names, "initial": initial};
            }
        }
        if (dim === 1)
        {
            let n_columns = table_output.numberColumns;
            let col_spans = n_columns < 2 ? [] : table_output.columnSpans;
            let col_indices = table_output.columnIndices(include_nets_sums = false);
            let col_names = [];
            if (col_spans.length > 1)
            {
                col_names = table_output.columnLabels;
                table_output_names = flattenSpanNames(col_names, col_spans);
            } else
            {
                col_names = is_crosstab_or_multi_or_raw ? table_output.columnLabels : table_output.statistics;
                let initial = !!col_indices ? col_names.filter((name, i) => col_indices.includes(i)) : filterSingleNames(col_names);
                table_output_names = {"names": col_names, "initial": initial};
            }
        }
    }
    return table_output_names;
}
function filterSingleNames(names)
{
    return names.filter(n => !UNCHECK_NAMES.includes(n));
}

function flattenSpanNames(labels, span_names)
{
    let span_length = span_names.length;
    let span_labels = labels;
    let unselect_labels = span_names.filter(span => UNCHECK_NAMES.includes(span["label"]));
    let unselect_span_indices = [];
    if(unselect_labels.length > 0)
    {
        unselect_span_indices = unselect_labels.map(unselect => unselect["indices"]);
        unselect_span_indices = [].concat.apply([], unselect_span_indices);
        unselect_span_indices = uniq(unselect_span_indices);
    }
    let unselected_base_indices = labels.map((l, i) => UNCHECK_NAMES.includes(l) ? i : "").filter(Number);
    let unselected_indices = [].concat.apply([], [unselect_span_indices, unselected_base_indices]);
    unselected_indices = uniq(unselected_indices)
    labels.forEach((item, i) => {
        for (j = 0; j < span_length; j++)
        {
            let curr_span = span_names[j];
            if (curr_span["indices"].includes(i))
            {
                span_labels[i] = span_names[j]["label"] + " - " + span_labels[i];
            }
        }
    });
    let initial_values = span_labels.filter((label, i) => !unselected_indices.includes(i));
    return {"names": span_labels, "initial": initial_values};
}

function recursiveGetItemByGuid(group_item, guid) {
    var cur_sub_items = group_item.subItems;
    for (var j = 0; j < cur_sub_items.length; j++)
    {
        if (cur_sub_items[j].type == "ReportGroup") {
            var res = recursiveGetItemByGuid(cur_sub_items[j], guid);
            if (res != null)
                return(res)
        }
        else if (cur_sub_items[j].guid == guid)
            return(cur_sub_items[j]);
    }
    return null;
}

let user_input = form.dropBox({name: "formInput", label: "Input",
                               types: ["table", "RItem: " + ALLOWED_R_CLASSES.join(", ")],
                               prompt: "Input data with columns to sum, e.g. a Table, R matrix"}).getValue();
form.comboBox({name: 'formRemoveMissing',
               alternatives: ['Yes (show warning)', 'No', 'Yes'],
               label: 'Calculate for columns with incomplete data',
               prompt: 'If set to \'Yes\', any missing values are removed from the data before the calculation occurs. ' +
                       'If set to \'No\', columns with any missing values will be assigned a missing value. ' +
                       'Columns whose values are entirely missing, will always be assigned a missing value ' +
                       'regardless of this setting.',
               default_value: 'Yes (show warning)'});
let col_names = {"names": [], "initial": []};

if (!!user_input)
{
    var input = recursiveGetItemByGuid(project.report, user_input.guid);
    row_names = getInputNames(input, 0);
    col_names = getInputNames(input, 1);
}

function uniq(a) {
    var seen = {};
    return a.filter(function(item) {
        return seen.hasOwnProperty(item) ? false : (seen[item] = true);
    });
}

form.textBox({name: "formIncludeRows", label: "Rows to exclude", prompt: "Select the row labels to be excluded in the output table.", default_value: "NET; SUM", required: false});
form.textBox({name: "formIncludeColumns", label: "Columns to exclude", prompt: "Select the columns labels to be excluded in the output table.", default_value: "NET; SUM", required: false});

form.setHeading("Sum Each Column");
library(verbs)

include.rows <- get0("formIncludeRows", ifnotfound = NULL)
include.cols <- get0("formIncludeColumns", ifnotfound = NULL)

sep <- ifelse(grepl(";", include.rows), ";", ",")
remove.rows    <- trimws(strsplit(include.rows, sep)[[1]])
sep <- ifelse(grepl(";", include.cols), ";", ",")
remove.columns <- trimws(strsplit(include.cols, sep)[[1]])
input <- formInput
input <- verbs:::checkInputsAtMost2DOrQTable(list(input))[[1]]

if (!is.null(dimnames(input))){
    all.rownames <- dimnames(input)[[1]]
}else
    all.rownames <- names(input)
## Handle edge where user wants to remove one label and it contains a comma
if (trimws(include.rows) %in% all.rownames)
    remove.rows <- trimws(include.rows)
all.colnames <- if (length(dim(input)) > 1) dimnames(input)[[2]]
if (trimws(include.cols) %in% all.colnames)
    remove.columns <- trimws(include.cols)

unmatched.rows <- which(!remove.rows %in% all.rownames)
unmatched.cols <- which(!remove.columns %in% all.colnames)
has.row.spans <- NCOL(attr(input, "span")$rows) > 1
## Don't warn if user hasn't changed from defaults, unless table has been flattened
if (length(unmatched.rows) > 0 && (has.row.spans || include.rows != "NET; SUM"))
{
    bad.labels <- paste0("'", paste(remove.rows[unmatched.rows], collapse = "', '"), "'")
    msg <- ngettext(length(unmatched.rows),
                    paste0("The following row label requested to be excluded was ",
                           "not found in the input data: ", bad.labels, "."),
                    paste0("The following row labels specified to be excluded were ",
                           "not found in the input data: ", bad.labels, "."))
    good.labels <- all.rownames[1:min(3, length(all.rownames))]
    msg <- paste0(msg, " Please supply labels such as '", paste(good.labels, collapse = "', '"), "'.")
    warning(msg)
}

has.col.spans <- NCOL(attr(input, "span")$columns) > 1
## Don't warn if user hasn't changed from defaults, unless table has been flattened
if (!is.null(all.colnames) && length(unmatched.cols) > 0 &&
    (has.col.spans || include.cols != "NET; SUM"))
{
    bad.labels <- paste0("'", paste(remove.columns[unmatched.cols], collapse = "', '"), "'")
    msg <- ngettext(length(unmatched.rows),
                    paste0("The following column label requested to be excluded was ",
                           "not found in the input data: ", bad.labels, "."),
                    paste0("The following column labels specified to be excluded were ",
                           "not found in the input data: ", bad.labels, "."))
    good.labels <- all.colnames[1:min(3, length(all.colnames))]
    msg <- paste0(msg, " Please supply labels such as '", paste(good.labels, collapse = "', '"), "'.")
    warning(msg)
}
remove.missing <- startsWith(formRemoveMissing, "Yes")
warn <- if (endsWith(formRemoveMissing, "(show warning)")) TRUE else "MuffleMissingValueWarning"

sum.each.column <- SumEachColumn(QInputs(formInput),
                                 subset = ValidateFilterForEachColumnVariants(QInputs(formInput), QFilter),
                                 weights = QPopulationWeight,
                                 remove.missing = remove.missing,
                                 remove.rows = remove.rows,
                                 remove.columns = remove.columns,
                                 warn = warn)