Create New Variables - Duplicates

From Q
Jump to navigation Jump to search

Create a new variable which identifies the cases in a data set which have duplicated values in one or more variables. This is useful when wanting to identify cases to delete from the data set.

The Duplicates feature can take a single variable input, or it can take multiple variables as inputs. The new variable has two categories, Yes and No which indicate which cases are and are not duplicated, respectively Duplicates are identified starting at the top of the data file and moving down. The first case with a particular value (or combination of values if you have selected more than one variable) will be assigned a vale of No, and then any cases lower down in the file who have identical values will be assigned a value of Yes.

Usage

  1. Select one or more variables in the Variables and Questions tab.
  2. Select Automate > Browse Online Library > Create New Variables > Duplicates.

If you wish to change the variables that are being used to identify duplicates, then

  1. Select the Duplicates variable in the Variables and Questions tab.
  2. Right-click and select Edit R Variable.
  3. Choose variables in the Variables box.
  4. Click Update R Variable.
  1. Select one or more variables under Data Sets.
  2. Click the variable-hover (+) button to the right of your variable(s).
  3. Select Ready-Made New Variables > Duplicates.

If you wish to change the variables that are being used to identify duplicates, then select the Duplicates variable under Data Sets and then use the Variables section of the menu on the right hand side.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

includeWeb('QScript Selection Functions');
includeWeb('QScript Utility Functions');
includeWeb('QScript Value Attributes Functions');

function appendDuplicatedCasesVariableToDataSet() {

    const q_allowed_version = 20.14;
    const is_displayr = inDisplayr();
    if (!is_displayr && Q.fileFormatVersion() < q_allowed_version) {
        log("This feature requires a newer version of Q. Please contact support@q-researchsoftware.com");
        return false;
    }

    const user_selections = getAllUserSelections();
    let selected_variables = user_selections.selected_variables;
    let n_selected_variables = selected_variables.length;


    let selected_data_files = user_selections.selected_data_sets;
    if (selected_data_files.length > 1) {
        log('The selected variables come from more than one Data Set. Select variables from the same dataset and and run this feature again.');
        return false;
    }
    let data_file = selected_data_files[0];

    if (n_selected_variables === 0) {
        if (is_displayr) {
            log('No variables are selected. Select one or more variables under Data Sets and run this feature again.');
            return false;    
        } else {
            data_file = requestOneDataFileFromProject();
            let candidate_vars = data_file.variables;
            candidate_vars = candidate_vars.filter(function (v) {
                return !v.isHidden && !v.question.isBanner && v.question.isValid;
            });
            let prompt = "Select one or more variables to use to identify duplicate cases:";
                selected_variables = selectManyVariablesByQuestionNameAndLabel(prompt,
                                                                          candidate_vars, false).variables;
            n_selected_variables = selected_variables.length;
        }
        
    }

    if (selected_variables.some(v => v.isHidden)) {
        log("Some of the selected variables are hidden. Unhide these variables and then run this option again.");
        return;
    }

    
    
    const r_code = `
    duplicated(QDataFrame(formInputVariables))
    `;

    let js_code = `
    form.dropBox({multi:true,
                  label: "Variables ",
                  name: "formInputVariables",
                  types: ["Variable", "Question"]});
    `;
    let guids = selected_variables.map(selection => selection.guid);
    let control_settings = {'formInputVariables': guids.join(';')};
    let new_variable_name = preventDuplicateVariableName(data_file, 'duplicated_cases', '_');
    let new_variable_label = 'Duplicates';
    new_variable_label = preventDuplicateQuestionName(data_file, new_variable_label);
    let last_variable = selected_variables[selected_variables.length - 1];
    let new_variable = data_file.newRVariable(r_code, new_variable_name, new_variable_label,
                                              last_variable, js_code, control_settings);
    new_variable.variableType = 'Categorical';
    setLabelForVariablesInQuestion(new_variable.question, 0, 'No');
    setLabelForVariablesInQuestion(new_variable.question, 1, 'Yes');
    insertAtHoverButtonIfShown(new_variable.question);
    if (!is_displayr) {
        let new_table = project.report.appendTable();
        new_table.primary = new_variable.question;
        project.report.setSelectedRaw([new_table])
    }
    return true;
}

appendDuplicatedCasesVariableToDataSet();

See also