Calculation - Average Each Row - Table(s)

From Q
Jump to navigation Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!

This page contains the Standard R code for Calculation - Average Each Row, and can be used to compute the average of each row in a table.

Example

Consider the following table containing some statistics, such a per capita income and illiteracy rates, from a past census for states in the northeast United States.


To compute the average value for all states in the region for each statistic, select the input table and run this feature from the toolbar via Automate > Browse Online Library > Calculation > Average Each Row. The output is

Options

Input The table to be used in the calculation.

Calculate for inputs with incomplete data If this option is checked, than any missing values the input will be ignored in the calculation. If unchecked, then missing values are not removed before calculation and will propagate as missing values in the output.

Rows to include This listbox shows all row labels in the input. The calculation will not performed for any rows whose labels are unselected here and they will not be shown in the output. By default the "SUM" and "NET" rows and columns are not shown in the output.

Columns to include As above, but for columns; any column labels unselected here will not be included in the output.

Code

const UNCHECK_NAMES = ["SUM", "NET", "TOTAL"];
const MULTI_QUESTIONTYPES = ["Text - Multi", "Pick One - Multi",
                             "Pick Any - Compact",
                             "Pick Any - Grid", "Number - Grid"];
const ALLOWED_R_CLASSES = ["NULL", "numeric", "integer", "logical", "factor", "matrix", "array", "data.frame", "table"];

function getInputNames(input, dim = 0){
    var input_names;
    var listbox_names = {};
    let input_type = input.type;
    if (input_type === "R Output") {
        try {
            var output_class = input.outputClasses;
            if (output_class.includes("array") || output_class.includes("matrix")) {
                var dimnames = input.data.getAttribute([], "dimnames");
                if (dim < dimnames.length && dimnames[dim] != null)
                    input_names = dimnames[dim];
                else
                    input_names = [];
            } else if (output_class.includes("data.frame")) {
                if (dim === 1)
                    input_names = input.data.getAttribute([], "names");
                else {
                    let row_names = input.data.getAttribute([], "row.names");
                    input_names = typeof(row_names[0]) === "string" ? row_names : [];
                }
            } else {
                input_names = dim === 0 ? input.data.getAttribute([], "names") : [];
            }
        }catch(e) {
            input_names = [];
        }
        listbox_names["names"] = input_names;
        listbox_names["initial"] = filterSingleNames(input_names);
    } else {
        let primary_type = input.primary.variableSetStructure;
        let has_multi_or_grid = primary_type.endsWith("Multi") || primary_type.endsWith("Grid");
        let has_columns = !!input.secondary || has_multi_or_grid || input.cellStatistics.length > 1;
        listbox_names = {names: ["foo"], initial: has_columns ? ["bar"] : []};//getTableDimNames(input, dim);
    }
    // DS-3147: replace newline chars/any whitespace with single space
    if (listbox_names["names"].length > 0) {
        Object.keys(listbox_names).map(key => {
            listbox_names[key] = listbox_names[key].map(str => typeof(str) === "string" ? str.replace(/\s+/g, " ") : str);
        });
    }
    return listbox_names;
}

function getTableDimNames(table, dim)
{
    let has_primary = table.primary != null;
    let table_output_names = {"names": [], "initial": []};
    if (has_primary)
    {
        let table_output = table.calculateOutput();
        let is_crosstab_or_multi_or_raw = table.secondary.type === "Question"
	|| MULTI_QUESTIONTYPES.includes(table.primary.questionType)
	|| table.secondary === "RAW DATA";
        if (table.primary.isBanner && table.secondary === "SUMMARY")
            is_crosstab_or_multi_or_raw = false;
        if (dim === 0)
        {
            let row_names = table_output.rowLabels;
            let row_spans = table_output.rowSpans;
            let row_indices = table_output.rowIndices(include_nets_sums = false);
            if (row_spans.length > 1)
            {
                table_output_names = flattenSpanNames(row_names, row_spans);
            } else
            {
                let initial = !!row_indices ? row_names.filter((name, i) => row_indices.includes(i)) : filterSingleNames(row_names);
                table_output_names = {"names": row_names, "initial": initial};
            }
        }
        if (dim === 1)
        {
            let n_columns = table_output.numberColumns;
            let col_spans = n_columns < 2 ? [] : table_output.columnSpans;
            let col_indices = table_output.columnIndices(include_nets_sums = false);
            let col_names = [];
            if (col_spans.length > 1)
            {
                col_names = table_output.columnLabels;
                table_output_names = flattenSpanNames(col_names, col_spans);
            } else
            {
                col_names = is_crosstab_or_multi_or_raw ? table_output.columnLabels : table_output.statistics;
                let initial = !!col_indices ? col_names.filter((name, i) => col_indices.includes(i)) : filterSingleNames(col_names);
                table_output_names = {"names": col_names, "initial": initial};
            }
        }
    }
    return table_output_names;
}

function filterSingleNames(names)
{
    return names.filter(n => !UNCHECK_NAMES.includes(n));
}

function flattenSpanNames(labels, span_names)
{
    let span_length = span_names.length;
    let span_labels = labels;
    let unselect_labels = span_names.filter(span => UNCHECK_NAMES.includes(span["label"]));
    let unselect_span_indices = [];
    if(unselect_labels.length > 0)
    {
        unselect_span_indices = unselect_labels.map(unselect => unselect["indices"]);
        unselect_span_indices = [].concat.apply([], unselect_span_indices);
        unselect_span_indices = uniq(unselect_span_indices);
    }
    let unselected_base_indices = labels.map((l, i) => UNCHECK_NAMES.includes(l) ? i : "").filter(Number);
    let unselected_indices = [].concat.apply([], [unselect_span_indices, unselected_base_indices]);
    unselected_indices = uniq(unselected_indices)
    labels.forEach((item, i) => {
        for (j = 0; j < span_length; j++)
        {
            let curr_span = span_names[j];
            if (curr_span["indices"].includes(i))
            {
                span_labels[i] = span_names[j]["label"] + " - " + span_labels[i];
            }
        }
    });
    let initial_values = span_labels.filter((label, i) => !unselected_indices.includes(i));
    return {"names": span_labels, "initial": initial_values};
}

function recursiveGetItemByGuid(group_item, guid) {
    var cur_sub_items = group_item.subItems;
    for (var j = 0; j < cur_sub_items.length; j++)
    {
        if (cur_sub_items[j].type == "ReportGroup") {
            var res = recursiveGetItemByGuid(cur_sub_items[j], guid);
            if (res != null)
                return(res)
        }
        else if (cur_sub_items[j].guid == guid)
            return(cur_sub_items[j]);
    }
    return null;
}

let user_input = form.dropBox({name: "formInput", label: "Input",
                               types: ["table", "RItem:" + ALLOWED_R_CLASSES.join(", ")],
                               prompt: "Input data with rows to sum, e.g. a Table, R matrix"}).getValue();
form.comboBox({name: 'formRemoveMissing',
               alternatives: ['Yes (show warning)', 'No', 'Yes'],
               label: 'Calculate for rows with incomplete data',
               prompt: 'If set to \'Yes\', any missing values are removed from the data before the calculation occurs. ' +
                       'If set to \'No\', rows with any missing values will be assigned a missing value. ' +
                       'Rows whose values are entirely missing, will always be assigned a missing value ' +
                       'regardless of this setting.',
               default_value: 'Yes (show warning)'});
let row_names = {"names": [], "initial": []};
let col_names = {"names": [], "initial": []};

if (!!user_input)
{
    var input = recursiveGetItemByGuid(project.report, user_input.guid);
    row_names = getInputNames(input, 0);
    col_names = getInputNames(input, 1);
}

function uniq(a) {
    var seen = {};
    return a.filter(function(item) {
        return seen.hasOwnProperty(item) ? false : (seen[item] = true);
    });
}

form.textBox({name: "formIncludeRows", label: "Rows to exclude", prompt: "Select the row labels to be excluded in the output table.", default_value: "NET; SUM", required: false});
form.textBox({name: "formIncludeColumns", label: "Columns to exclude", prompt: "Select the columns labels to be excluded in the output table.", default_value: "NET; SUM", required: false});

form.setHeading("Average Each Row");
library(verbs)

removal.choices <- list(formIncludeRows, formIncludeColumns)
categories.to.remove <- ParseCategoriesToRemove(removal.choices, list(formInput))
remove.rows    <- categories.to.remove[[1L]]
remove.columns <- categories.to.remove[[2L]]
remove.missing <- startsWith(formRemoveMissing, "Yes")
warn <- if (endsWith(formRemoveMissing, "(show warning)")) TRUE else "MuffleMissingValueWarning"

average.each.row <- AverageEachRow(QInputs(formInput),
                                   remove.missing = remove.missing,
                                   remove.rows = remove.rows,
                                   remove.columns = remove.columns,
                                   warn = warn)