Calculation - Sum Each Row - Table(s)

From Q
Jump to navigation Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!

This page contains the Standard R code for Calculation - Sum Each Row, and can be used to compute the sum for each row in a table.

Example

Consider the following input table, a crosstab showing the respondent preferences for various cola brands by age.

To compute the total number of respondents that prefer each brand of cola, we can use this feature to obtain the following output.

Options

Input The table to be used in the calculation.

Calculate for inputs with incomplete data If this option is checked, than any missing values the input will be ignored in the calculation. If unchecked, then missing values are not removed before calculation and will propagate as missing values in the output.

Rows to include This listbox shows all row labels in the input. The calculation will not performed for any rows whose labels are unselected here and they will not be shown in the output. By default the "SUM" and "NET" rows and columns are not shown in the output.

Columns to include As above, but for columns; any column labels unselected here will not be included in the output.

Technical Details

When Calculate for inputs with incomplete data is checked and the input to the calculation consists entirely of missing values, then the returned output value is set to missing data. Prior to 5th October, 2021 the returned output value was different in this situation. For example, consider the input into the calculation with the three values.

A B C
NaN NaN NaN

Then applying Sum Each Row to this input returned the value of zero before 5th October, 2021. After this date, the Sum Each Row function returns the output value of NaN.

Code

const UNCHECK_NAMES = ["SUM", "NET", "TOTAL"];
const MULTI_QUESTIONTYPES = ["Text - Multi", "Pick One - Multi",
                             "Pick Any - Compact",
                             "Pick Any - Grid", "Number - Grid"];
const ALLOWED_R_CLASSES = ["NULL", "numeric", "integer", "logical", "factor", "matrix", "array", "data.frame", "table"];

function getInputNames(input, dim = 0){
    var input_names;
    var listbox_names = {};
    let input_type = input.type;
    if (input_type === "R Output") {
        try {
            var output_class = input.outputClasses;
            if (output_class.includes("array") || output_class.includes("matrix")) {
                var dimnames = input.data.getAttribute([], "dimnames");
                if (dim < dimnames.length && dimnames[dim] != null)
                    input_names = dimnames[dim];
                else
                    input_names = [];
            } else if (output_class.includes("data.frame")) {
                if (dim === 1)
                    input_names = input.data.getAttribute([], "names");
                else {
                    let row_names = input.data.getAttribute([], "row.names");
                    input_names = typeof(row_names[0]) === "string" ? row_names : [];
                }
            } else {
                input_names = dim === 0 ? input.data.getAttribute([], "names") : [];
            }
        }catch(e) {
            input_names = [];
        }
        listbox_names["names"] = input_names;
        listbox_names["initial"] = filterSingleNames(input_names);
    } else {
        let primary_type = input.primary.variableSetStructure;
        let has_multi_or_grid = primary_type.endsWith("Multi") || primary_type.endsWith("Grid");
        let has_columns = !!input.secondary || has_multi_or_grid || input.cellStatistics.length > 1;
        listbox_names = {names: ["foo"], initial: has_columns ? ["bar"] : []};//getTableDimNames(input, dim);
    }
    // DS-3147: replace newline chars/any whitespace with single space
    if (listbox_names["names"].length > 0) {
        Object.keys(listbox_names).map(key => {
            listbox_names[key] = listbox_names[key].map(str => typeof(str) === "string" ? str.replace(/\s+/g, " ") : str);
        });
    }
    return listbox_names;
}

function getTableDimNames(table, dim)
{
    let has_primary = table.primary != null;
    let table_output_names = {"names": [], "initial": []};
    if (has_primary)
    {
        let table_output = table.calculateOutput();
        let is_crosstab_or_multi_or_raw = table.secondary.type === "Question"
	|| MULTI_QUESTIONTYPES.includes(table.primary.questionType)
	|| table.secondary === "RAW DATA";
        if (table.primary.isBanner && table.secondary === "SUMMARY")
            is_crosstab_or_multi_or_raw = false;
        if (dim === 0)
        {
            let row_names = table_output.rowLabels;
            let row_spans = table_output.rowSpans;
            let row_indices = table_output.rowIndices(include_nets_sums = false);
            if (row_spans.length > 1)
            {
                table_output_names = flattenSpanNames(row_names, row_spans);
            } else
            {
                let initial = !!row_indices ? row_names.filter((name, i) => row_indices.includes(i)) : filterSingleNames(row_names);
                table_output_names = {"names": row_names, "initial": initial};
            }
        }
        if (dim === 1)
        {
            let n_columns = table_output.numberColumns;
            let col_spans = n_columns < 2 ? [] : table_output.columnSpans;
            let col_indices = table_output.columnIndices(include_nets_sums = false);
            let col_names = [];
            if (col_spans.length > 1)
            {
                col_names = table_output.columnLabels;
                table_output_names = flattenSpanNames(col_names, col_spans);
            } else
            {
                col_names = is_crosstab_or_multi_or_raw ? table_output.columnLabels : table_output.statistics;
                let initial = !!col_indices ? col_names.filter((name, i) => col_indices.includes(i)) : filterSingleNames(col_names);
                table_output_names = {"names": col_names, "initial": initial};
            }
        }
    }
    return table_output_names;
}
function filterSingleNames(names)
{
    return names.filter(n => !UNCHECK_NAMES.includes(n));
}

function flattenSpanNames(labels, span_names)
{
    let span_length = span_names.length;
    let span_labels = labels;
    let unselect_labels = span_names.filter(span => UNCHECK_NAMES.includes(span["label"]));
    let unselect_span_indices = [];
    if(unselect_labels.length > 0)
    {
        unselect_span_indices = unselect_labels.map(unselect => unselect["indices"]);
        unselect_span_indices = [].concat.apply([], unselect_span_indices);
        unselect_span_indices = uniq(unselect_span_indices);
    }
    let unselected_base_indices = labels.map((l, i) => UNCHECK_NAMES.includes(l) ? i : "").filter(Number);
    let unselected_indices = [].concat.apply([], [unselect_span_indices, unselected_base_indices]);
    unselected_indices = uniq(unselected_indices)
    labels.forEach((item, i) => {
        for (j = 0; j < span_length; j++)
        {
            let curr_span = span_names[j];
            if (curr_span["indices"].includes(i))
            {
                span_labels[i] = span_names[j]["label"] + " - " + span_labels[i];
            }
        }
    });
    let initial_values = span_labels.filter((label, i) => !unselected_indices.includes(i));
    return {"names": span_labels, "initial": initial_values};
}

function recursiveGetItemByGuid(group_item, guid) {
    var cur_sub_items = group_item.subItems;
    for (var j = 0; j < cur_sub_items.length; j++)
    {
        if (cur_sub_items[j].type == "ReportGroup") {
            var res = recursiveGetItemByGuid(cur_sub_items[j], guid);
            if (res != null)
                return(res)
        }
        else if (cur_sub_items[j].guid == guid)
            return(cur_sub_items[j]);
    }
    return null;
}

let user_input = form.dropBox({name: "formInput", label: "Input",
                               types: ["table", "RItem:" + ALLOWED_R_CLASSES.join(", ")],
                               prompt: "Input data with rows to sum, e.g. a Table, R matrix"}).getValue();
form.comboBox({name: 'formRemoveMissing',
               alternatives: ['Yes (show warning)', 'No', 'Yes'],
               label: 'Calculate for rows with incomplete data',
               prompt: 'If set to \'Yes\', any missing values are removed from the data before the calculation occurs. ' +
                       'If set to \'No\', rows with any missing values will be assigned a missing value. ' +
                       'Rows whose values are entirely missing, will always be assigned a missing value ' +
                       'regardless of this setting.',
               default_value: 'Yes (show warning)'});
let col_names = {"names": [], "initial": []};

if (!!user_input)
{
    var input = recursiveGetItemByGuid(project.report, user_input.guid);
    row_names = getInputNames(input, 0);
    col_names = getInputNames(input, 1);
}

function uniq(a) {
    var seen = {};
    return a.filter(function(item) {
        return seen.hasOwnProperty(item) ? false : (seen[item] = true);
    });
}

form.textBox({name: "formIncludeRows", label: "Rows to exclude", prompt: "Select the row labels to be excluded in the output table.", default_value: "NET; SUM", required: false});
form.textBox({name: "formIncludeColumns", label: "Columns to exclude", prompt: "Select the columns labels to be excluded in the output table.", default_value: "NET; SUM", required: false});

form.setHeading("Sum Each Row");
library(verbs)

include.rows <- get0("formIncludeRows", ifnotfound = NULL)
include.cols <- get0("formIncludeColumns", ifnotfound = NULL)

sep <- ifelse(grepl(";", include.rows), ";", ",")
remove.rows    <- trimws(strsplit(include.rows, sep)[[1]])
sep <- ifelse(grepl(";", include.cols), ";", ",")
remove.columns <- trimws(strsplit(include.cols, sep)[[1]])
input <- formInput
input <- verbs:::checkInputsAtMost2DOrQTable(list(input))[[1]]

if (!is.null(dimnames(input))){
    all.rownames <- dimnames(input)[[1]]
}else
    all.rownames <- names(input)
## Handle edge where user wants to remove one label and it contains a comma
if (trimws(include.rows) %in% all.rownames)
    remove.rows <- trimws(include.rows)
all.colnames <- if (length(dim(input)) > 1) dimnames(input)[[2]]
if (trimws(include.cols) %in% all.colnames)
    remove.columns <- trimws(include.cols)

unmatched.rows <- which(!remove.rows %in% all.rownames)
unmatched.cols <- which(!remove.columns %in% all.colnames)
has.row.spans <- NCOL(attr(input, "span")$rows) > 1
## Don't warn if user hasn't changed from defaults, unless table has been flattened
if (length(unmatched.rows) > 0 && (has.row.spans || include.rows != "NET; SUM"))
{
    bad.labels <- paste0("'", paste(remove.rows[unmatched.rows], collapse = "', '"), "'")
    msg <- ngettext(length(unmatched.rows),
                    paste0("The following row label requested to be excluded was ",
                           "not found in the input data: ", bad.labels, "."),
                    paste0("The following row labels specified to be excluded were ",
                           "not found in the input data: ", bad.labels, "."))
    good.labels <- all.rownames[1:min(3, length(all.rownames))]
    msg <- paste0(msg, " Please supply labels such as '", paste(good.labels, collapse = "'; '"), "'.")
    warning(msg)
}

has.col.spans <- NCOL(attr(input, "span")$columns) > 1
## Don't warn if user hasn't changed from defaults, unless table has been flattened
if (!is.null(all.colnames) && length(unmatched.cols) > 0 &&
    (has.col.spans || include.cols != "NET; SUM"))
{
    bad.labels <- paste0("'", paste(remove.columns[unmatched.cols], collapse = "', '"), "'")
    msg <- ngettext(length(unmatched.rows),
                    paste0("The following column label requested to be excluded was ",
                           "not found in the input data: ", bad.labels, "."),
                    paste0("The following column labels specified to be excluded were ",
                           "not found in the input data: ", bad.labels, "."))
    good.labels <- all.colnames[1:min(3, length(all.colnames))]
    msg <- paste0(msg, " Please supply labels such as '", paste(good.labels, collapse = "'; '"), "'.")
    warning(msg)
}
remove.missing <- startsWith(formRemoveMissing, "Yes")
warn <- if (endsWith(formRemoveMissing, "(show warning)")) TRUE else "MuffleMissingValueWarning"

sum.each.row <- SumEachRow(QInputs(formInput),
                           remove.missing = remove.missing,
                           remove.rows = remove.rows,
                           remove.columns = remove.columns,
                           warn = warn)