Create New Variables - Duplicates
Create a new variable which identifies the cases in a data set which have duplicated values in one or more variables. This is useful when wanting to identify cases to delete from the data set.
The Duplicates feature can take a single variable input, or it can take multiple variables as inputs. The new variable has two categories, Yes and No which indicate which cases are and are not duplicated, respectively Duplicates are identified starting at the top of the data file and moving down. The first case with a particular value (or combination of values if you have selected more than one variable) will be assigned a vale of No, and then any cases lower down in the file who have identical values will be assigned a value of Yes.
Usage
- Select one or more variables in the Variables and Questions tab.
- Select Automate > Browse Online Library > Create New Variables > Duplicates.
If you wish to change the variables that are being used to identify duplicates, then
- Select the Duplicates variable in the Variables and Questions tab.
- Right-click and select Edit R Variable.
- Choose variables in the Variables box.
- Click Update R Variable.
How to apply this QScript
- Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
- Click on the QScript when it appears in the QScripts and Rules section of the search results.
OR
- Select Automate > Browse Online Library.
- Select this QScript from the list.
Customizing the QScript
This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.
Customizing QScripts in Q4.11 and more recent versions
- Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
- Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
- Press Edit a Copy (bottom-left corner of the preview).
- Modify the JavaScript (see QScripts for more detail on this).
- Either:
- Run the QScript, by pressing the blue triangle button.
- Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.
Customizing QScripts in older versions
JavaScript
includeWeb('QScript Selection Functions');
includeWeb('QScript Utility Functions');
includeWeb('QScript Value Attributes Functions');
function appendDuplicatedCasesVariableToDataSet() {
const q_allowed_version = 20.14;
const is_displayr = inDisplayr();
if (!is_displayr && Q.fileFormatVersion() < q_allowed_version) {
log("This feature requires a newer version of Q. Please contact support@q-researchsoftware.com");
return false;
}
const user_selections = getAllUserSelections();
let selected_variables = user_selections.selected_variables;
let n_selected_variables = selected_variables.length;
let selected_data_files = user_selections.selected_data_sets;
if (selected_data_files.length > 1) {
log('The selected variables come from more than one Data Set. Select variables from the same dataset and and run this feature again.');
return false;
}
let data_file = selected_data_files[0];
if (n_selected_variables === 0) {
if (is_displayr) {
log('No variables are selected. Select one or more variables under Data Sets and run this feature again.');
return false;
} else {
data_file = requestOneDataFileFromProject();
let candidate_vars = data_file.variables;
candidate_vars = candidate_vars.filter(function (v) {
return !v.isHidden && !v.question.isBanner && v.question.isValid;
});
let prompt = "Select one or more variables to use to identify duplicate cases:";
selected_variables = selectManyVariablesByQuestionNameAndLabel(prompt,
candidate_vars, false).variables;
n_selected_variables = selected_variables.length;
}
}
if (selected_variables.some(v => v.isHidden)) {
log("Some of the selected variables are hidden. Unhide these variables and then run this option again.");
return;
}
const r_code = `
duplicated(QDataFrame(formInputVariables))
`;
let js_code = `
form.dropBox({multi:true,
label: "Variables ",
name: "formInputVariables",
types: ["Variable", "Question"]});
`;
let guids = selected_variables.map(selection => selection.guid);
let control_settings = {'formInputVariables': guids.join(';')};
let new_variable_name = preventDuplicateVariableName(data_file, 'duplicated_cases', '_');
let new_variable_label = 'Duplicates';
new_variable_label = preventDuplicateQuestionName(data_file, new_variable_label);
let last_variable = selected_variables[selected_variables.length - 1];
let new_variable = data_file.newRVariable(r_code, new_variable_name, new_variable_label,
last_variable, js_code, control_settings);
new_variable.variableType = 'Categorical';
setLabelForVariablesInQuestion(new_variable.question, 0, 'No');
setLabelForVariablesInQuestion(new_variable.question, 1, 'Yes');
insertAtHoverButtonIfShown(new_variable.question);
if (!is_displayr) {
let new_table = project.report.appendTable();
new_table.primary = new_variable.question;
project.report.setSelectedRaw([new_table])
}
return true;
}
appendDuplicatedCasesVariableToDataSet();
See also
- QScript for more general information about QScripts.
- QScript Examples Library for other examples.
- Online JavaScript Libraries for the libraries of functions that can be used when writing QScripts.
- QScript Reference for information about how QScript can manipulate the different elements of a project.
- JavaScript for information about the JavaScript programming language.
- Table JavaScript and Plot JavaScript for tools for using JavaScript to modify the appearance of tables and charts.
Q Technical Reference
Q Technical Reference
Q Technical Reference
Q Technical Reference > Setting Up Data > Creating New Variables
Q Technical Reference > Setting Up Data > Creating New Variables
Q Technical Reference > Updating and Automation > Automation Online Library
Q Technical Reference > Updating and Automation > JavaScript > QScript > QScript Examples Library > QScript Online Library