Create New Variables - Binary Variable(s)

From Q
Jump to navigation Jump to search

This tool creates new binary variables from existing data. Binary variables are ones which can take two values, 1 and 0, with the possibility of a missing value for cases that do not have data. For numeric data, the new binary variables correspond to cases which have a value greater than 0, and you can customize this to other threshhold values as needed (see below). For categorical data, the new binary variables correspond to the categories in the data. Additionally, when applying this tool to multiple categorical variable setsspan, the new binary variables will indicate which cases match those categories in any of the selected data. This is useful when wanting to combine categorical data of different types, for example if wanting to combine responses from a Top of mind awarness question (single response) with those from an Other awareness question (multiple response) to form a measure of Total awarness.

Usage

  1. Select the variables or questions that you want to use in the Variables and Questions tab. These should be all numeric (Number, Number - Multi, Number - Grid) or all categorical (Pick One, Pick One - Multi, Pick Any, Pick Any - Compact).
  2. Run this tool using Automate > Browse Online Library > Create New Variables > Binary Variables.
  3. If using categorical questions, choose whether to return values for incomplete data (see below).
  4. Enter a name for the new data and click OK'.

If you have chose numeric data, you can modify the formula used to determine which values in the underlying data are mapped to a value of 1 in the new binary variables using the following steps:

  1. Find the new question in the Variables and Questions tab.
  2. Right-click and select Edit R Variable.
  3. Modify the formula as needed.
  4. Run the code and click Update R Variable.
  1. Select the variables that you want to use under Data Sets. These should be all numeric (Numeric, Numeric - Multi, Numeric - Grid) or all categorical (Nominal/Ordinal, Nominal/Ordinal - Multi, Binary - Multi, Binary - Multi (Compact)).
  2. Run this tool by clicking the plus-sign next to the variables and selecting Ready-Made New Variables > Binary Variables.
  3. If using categorical questions, choose whether to return values for incomplete data (see below).

If you have chose numeric data, you can modify the formula used to determine which values in the underlying data are mapped to a value of 1 in the new binary variables using the following steps:

  1. Find the new binary data under Data Sets
  2. Expand the variable set and select one of its variables.
  3. Modify the code in Properties > R CODE on the right of the screen.
  4. Click the Calculate button.

Technical details

When the inputs to the binary variables are categorical, you are offered the choice to Compute for cases with incomplete data. If you click Yes, then the new variables will always have a 0 or 1 value unless all of the input data for that category for that case are missing. If you click No, then each category will have a missing value when that case in the data has any missing values among the selected input data.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

includeWeb("QScript Utility Functions");
includeWeb("QScript Selection Functions");
includeWeb("QScript Functions to Generate Outputs");
includeWeb("QScript R Output Functions");
includeWeb("QScript R Variable Creation Functions");

function isInArray(value, arr) {
  return arr.indexOf(value) > -1;
}


function getVariableOrQuestionLabel(variable) {
    if (/- Multi|- Grid/.test(variable.question.variableSetStructure))
        return variable.question.name;
    if (variable.label.length > 0)
        return variable.label;
    else
        return variable.name
}

function numericQuestionsToBinary(questions) {
    const is_displayr = inDisplayr();
    let variables = getVariablesFromQuestions(questions);
    let data_file = variables[0].question.dataFile;
    let suitable_for_grid = suitableForGrid(questions);
    let make_grid = (questions.length > 1 & suitable_for_grid) || (questions.length === 1 & suitable_for_grid & questions[0].questionType === "Number - Grid");

    let variable_labels = variables.map(function(v) {
        return v.label;
    });
    let duplicate_variable_labels = variable_labels.some(function(x) {
        return variable_labels.indexOf(x) !== variable_labels.lastIndexOf(x)
    });

    if (make_grid || duplicate_variable_labels) {
        variable_labels = variables.map(function(v, v_ind) {
            return(v.question.name + " - " + variable_labels[v_ind])
        });
    }

    let base_question_name = preventDuplicateQuestionName(data_file, variable_labels.filter(onlyUnique).join(" + "));
    let r_variable_name = variables.length === 1 ? "x" : "variable.set";
    let last_variable = getLastVariable(variables);
    let temp_var_name = randomVariableName(16); // temporary name, random to (almost) guarantee uniqueness
    let variable_names = variables.map(function(v) {
        return checkDuplicateVariable(v.name) ? generateDisambiguatedVariableName(v) : stringToRName(v.name);
    });

    // Simple assignment if single variable, otherwise data.frame
    let expression;
    if (variables.length === 1) {
        expression = r_variable_name + ' <- ' + variable_names + '\n';
    } else {
        let df_assignments = [];
        for (i = 0; i < variables.length; i += 1) {
            df_assignments[i] = stringToRName(variable_labels[i]) + " = " +  variable_names[i];
        }
        let def_prefix = r_variable_name + ' <- data.frame(';
        let white_spaces = " ".repeat(def_prefix.length);
        expression = def_prefix + df_assignments.join(",\n" + white_spaces) + ',\n' + white_spaces + 'check.names = FALSE)\n';
    }

    expression += "binary.var = " + r_variable_name + " > 0\n" +
              "# If you wish to change the cut-off for the count (from > 0), modify the code above\n" +
              "# E.g. To count values larger than 50, change > 0 to > 50\n" +
              "# E.g. To count values smaller than or equal to 25, change > 0 to <= 25\n"
              +"binary.var";

    checkFileHasMoreThanOneCase(data_file);
    try {
        var question = robustNewRQuestion(data_file, expression, base_question_name, temp_var_name, last_variable);
        question.questionType = make_grid ? "Pick Any - Grid" : "Pick Any";
        question.name = preventDuplicateQuestionName(data_file, variables.map(function(x) {
            return(getVariableOrQuestionLabel(x))
        }).filter(onlyUnique).join(" + ") + " > 0");
        question.needsCheckValuesToCount = false;
        insertAtHoverButtonIfShown(question);
    } catch (e) {
        var structure_name = getVariableNaming(is_displayr);
        log("The binary transform could not be computed for this " + structure_name + ": " + e);
        return false;
    }
    // Replace temporary variable names
    nameSequentialVariables(question.variables, "binary");
    reportNewRQuestion(question, "Binary transformed question");
}

categoricalQuestionsToBinary = function(selected_questions, compute_for_incomplete) {

    const is_displayr = inDisplayr();
    let data_file = selected_questions[0].dataFile;
    checkFileHasMoreThanOneCase(data_file);

    // Ensure labels match between questions
    let all_labels = getAllCategories(selected_questions);

    let r_names = selected_questions.map(x => generateDisambiguatedQuestionName(x))

    let q_name = `Binary variables from: ${selected_questions.map(q => q.name).join(" + ")}`;


    let r_expr = `
library(flipData)
n.categories <- ${all_labels.length}
binary.data <- CombineVariableSetsAsBinary(${r_names.join(", ")}, compute.for.incomplete = ${compute_for_incomplete ? "TRUE" : "FALSE"})
if (ncol(binary.data) != n.categories) {
    stop("The number of categories in the input data has changed. You should re-run the Binary Variables option again.")
}
binary.data
    `

    checkFileHasMoreThanOneCase(data_file);
    try {
        let new_q = robustNewRQuestion(data_file, r_expr, preventDuplicateQuestionName(data_file, q_name), "binary" + makeid(), null);
        new_q.questionType = "Pick Any";
        new_q.needsCheckValuesToCount = false;
        insertAtHoverButtonIfShown(new_q);
        reportNewRQuestion(new_q, "Binary transformed question");
    } catch (e) {
        let message = 'Binary variables could not be created for the selected variables. ';
        message += e.message;
        log(message);
    }
}


getAllCategories = function(questions) {

    let labels = questions.map(function (q) {
        if (q.questionType.indexOf("Pick One") == 0) {
            return getRFactorLevelsFromQuestionOrVariable(q).labels;
        } else if (q.questionType.indexOf("Pick Any") == 0) {
            let data_reduction = q.dataReduction;
            let net_rows = data_reduction.netRows;
            let labels = data_reduction.rowLabels;
            labels = labels.filter(function (x, index) {
                return net_rows.indexOf(index) == -1;
            });
            return labels;
        }

        throw new UserError( `${(inDisplayr() ? q.variableSetStructure : q.questionType)} data is not supported.`)

    })

    let all_labels = [];
    labels.forEach(function(arr) {
        arr.forEach(function (label) {
            if (all_labels.indexOf(label) == -1)
                all_labels.push(label);
        });
    });

    return all_labels;


}

getVariableNaming = function(is_displayr) {
    return is_displayr ? "variable sets" : "questions";
}

suitableForGrid = function(questions) {
    // Check each question has the same number of variables
    var qvar_names = questions.map(function(q) {return(q.variables.map(function(v) {return(v.label)}))});
    for (var i = 1; i < questions.length; i++) {
        if (qvar_names[i].length != qvar_names[0].length)
            return false;
        if (!arraysEqual(qvar_names[i], qvar_names[0]))
            return false;
    }
    return true;
}


function binaryVariables() {
    const is_displayr = inDisplayr();
    const allowed_types = ["Nominal",
                           "Nominal - Multi",
                           "Numeric",
                           "Numeric - Multi",
                           "Numeric - Grid",
                           "Numeric - Multi",
                           "Ordinal",
                           "Ordinal - Multi",
                           "Binary - Multi",
                           "Binary - Multi (Compact)"];

    let selected_questions = selectInputQuestions(allowed_types);
    if (!selected_questions)
        return false;
    let data_file = getDataFileFromQuestions(selected_questions);

    // Grab all base variables from all selected items
    let all_variables = getVariablesFromQuestions(selected_questions);
    let variable_set_structures = selected_questions.map(function(x){ return(x.variableSetStructure); });
    let variable_feedback = is_displayr ? variable_set_structures : selected_questions.map(function(x){return(x.questionType)});

    let all_numeric = variable_set_structures.every(x => /Numeric/.test(x));
    let binary_categorical_mix = variable_set_structures.every(x => /(Nominal|Ordinal|Binary)/.test(x));

    if (binary_categorical_mix && Q.fileFormatVersion() < 22.12) {
        log("This feature requires a new version of Q. Please contact support@q-researchsoftware.com");
        return;
    }

    if (!(all_numeric || binary_categorical_mix)) {
        log(correctTerminology(`Cannot create Binary Variables from this selection of ${is_displayr ? "variable set Structures" : "Question Types"}. Select variables which are all Numeric, all Nominal/Ordinal, or a mix of Binary and Nominal/Ordinal.`));
        return;
    }



    if (all_numeric) { // Use case 1: All selected questions are binary
        numericQuestionsToBinary(selected_questions);
    } else if (binary_categorical_mix) { // Use case 4: A mix of Binary and Nominal/Ordinal
        let compute_for_incomplete = true;
        if (selected_questions.length > 1)
            compute_for_incomplete = askYesNo("Compute for cases with incomplete data?");
        categoricalQuestionsToBinary(selected_questions, compute_for_incomplete);
    }


}

binaryVariables();

See also