Create New Variables - Variable(s) with Outliers Removed
Create new variable(s) with any outlying values in the selected variable(s) replaced with NaN
This tool checks the selected data for outliers and creates new copies of the data with the outliers removed. Outliers are defined as values that are not within a certain number of standard deviations from the variable mean, and you can choose how many standard deviations are used to determine which values are considered to be outliers. The default value is 3 standard deviations. The new copies of data will have the outlying values replaced with missing values. Data that does not contain outliers will not be copied.
A new folder will be created in the report tree that contains tables for the selected data and any new copies of data with the outliers removed.
The new copies of variables use a JavaScript formula to assign respondents with outlying values with a value of NaN. The means and standard deviations are determined when this script is run. As a result, the definition of an outlier in variables where the outliers have been removed will not be updated if the underlying data changes.
How to apply this QScript
- Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
- Click on the QScript when it appears in the QScripts and Rules section of the search results.
OR
- Select Automate > Browse Online Library.
- Select this QScript from the list.
Customizing the QScript
This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.
Customizing QScripts in Q4.11 and more recent versions
- Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
- Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
- Press Edit a Copy (bottom-left corner of the preview).
- Modify the JavaScript (see QScripts for more detail on this).
- Either:
- Run the QScript, by pressing the blue triangle button.
- Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.
Customizing QScripts in older versions
JavaScript
includeWeb("QScript Selection Functions");
includeWeb("QScript Functions to Generate Outputs");
includeWeb("QScript Table Functions");
outliersRemoved()
function outliersRemoved() {
const web_mode = inDisplayr();
const allowed_types = ["Numeric", "Numeric - Multi", "Numeric - Grid"];
let questions = selectInputQuestions(allowed_types);
if (!questions)
return false;
if (!areQuestionsValidAndNonEmpty(questions))
return false;
// Specify the number of standard deviations that defines an 'outlier'
let number_sd_from_mean;
while (isNaN(number_sd_from_mean)) {
number_sd_from_mean = prompt("Enter the cut-off value to use to identify outliers. Cases whose value is not within this many standard deviations from the mean will be considered outliers.", 3);
if (isNaN(number_sd_from_mean))
alert('The cut-off value must be a number.');
}
// Begin the report
let paragraphs = ["The tables below show data that has been checked for outliers.",
"Test: value is more than " + number_sd_from_mean + " standard deviations from the mean.",
"Where the data contains outliers, a copy of the data is made and it is labeled as 'OUTLIERS REMOVED'",
" "];
let group;
// creating the tables in the report tree
if (!web_mode) {
group = project.report.appendGroup();
group.name = "Checked for outliers";
}
// Check each of the input variables and construct new variables where needed
let outliers_found = false;
let outlier_list = [];
let no_outliers_list = [];
questions.forEach(function (question) {
if (!question.isHidden) {
let data_file = question.dataFile;
// Make a table for the original question (Q only)
if (!web_mode) {
new_table = group.appendTable();
new_table.primary = question;
new_table.secondary = "SUMMARY";
new_table.cellStatistics = ['Average', 'Standard Deviation','Minimum', 'Maximum', 'Base n', 'Missing n'];
}
// Check the statistics for each variable in the question to determine
// if there are any outliers
let outlier_data = question.variables.map(function (v) {
return checkVariableForOutliers(v, number_sd_from_mean);
});
let outliers = outlier_data.filter(function (obj) {
return obj.hasOutliers;
}).length > 0;
// Generate new variables if any of the variables in the question contains outliers
if (outliers) {
outliers_found = true;
let max_permissable = outlier_data.map(function (obj) {
return obj.maxPermissable;
});
let min_permissable = outlier_data.map(function (obj) {
return obj.minPermissable;
});
outlier_list.push(question.name);
let variables = question.variables;
let new_variables = [];
for (let v = 0; v < variables.length; v++) {
let v_name = variables[v].name;
let expression = "if (" + v_name + " > " + min_permissable[v] + " && " + v_name + " < " + max_permissable[v] +") " + v_name + "; else NaN";
try {
new_variables.push(question.dataFile.newJavaScriptVariable(expression, false, preventDuplicateVariableName(data_file, v_name + "_noOutliers"), variables[v].label, null));
} catch (e) {
log("Could not idenitify outliers in " + v_name + ": " + e);
return false;
}
}
data_file.moveAfter(new_variables, question.variables[question.variables.length - 1]);
let new_question = data_file.setQuestion(preventDuplicateQuestionName(data_file, question.name + " OUTLIERS REMOVED"), question.questionType, new_variables);
insertAtHoverButtonIfShown(new_question);
if (!web_mode){
new_table = group.appendTable();
new_table.primary = new_question;
new_table.secondary = "SUMMARY";
new_table.cellStatistics = ['Average', 'Standard Deviation','Minimum', 'Maximum', 'Base n', 'Missing n'];
}
} else {
no_outliers_list.push(question.name);
}
}
});
if (!web_mode) {
if (outliers_found) {
paragraphs.push("Outliers found in:");
paragraphs.push("");
paragraphs = paragraphs.concat(outlier_list);
} else
paragraphs.push('No outliers found');
simpleHTMLReport(paragraphs, "Checked for outliers", group, true, false);
} else { // In Displayr, just report what was done in a log.
if (!outliers_found) {
log("No outliers were detected in the selected data.");
} else if (no_outliers_list.length > 0) {
log("Some of the selected variable sets did not contain outliers:\r\n");
log(no_outliers_list.join("\r\n"))
}
}
return true;
}
// Checks a variable for outliers.
function checkVariableForOutliers(variable, number_sd_from_mean) {
// Compute standard deviation (note, no weights used)
let xx = variable.rawValues;
let n = 0;
let tot = 0;
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < xx.length; i++)
{
if (xx[i] != null && !isNaN(xx[i]))
{
tot += xx[i];
n++;
if (xx[i] > max)
max = xx[i];
if (xx[i] < min)
min = xx[i];
}
}
let mean = tot/n;
let tmp_sd = 0;
for (let i = 0; i < xx.length; i++)
{
if (xx[i] != null && !isNaN(xx[i]))
tmp_sd += (xx[i] - mean) * (xx[i] - mean);
}
let sd = Math.sqrt(tmp_sd/(n-1))
let outliers = false;
let min_permissable = mean - number_sd_from_mean * sd;
let max_permissable = mean + number_sd_from_mean * sd;
let too_low = min < min_permissable;
let too_high = max > max_permissable;
if (too_low || too_high)
outliers = true;
return { hasOutliers: outliers, maxPermissable: max_permissable, minPermissable: min_permissable };
}
See also
- QScript for more general information about QScripts.
- QScript Examples Library for other examples.
- Online JavaScript Libraries for the libraries of functions that can be used when writing QScripts.
- QScript Reference for information about how QScript can manipulate the different elements of a project.
- JavaScript for information about the JavaScript programming language.
- Table JavaScript and Plot JavaScript for tools for using JavaScript to modify the appearance of tables and charts.
Displayr - Anything Menu
Displayr - Insert
Displayr - New Variable Menu
Extensions
Q Technical Reference
Q Technical Reference
Q Technical Reference
Q Technical Reference
Q Technical Reference > Setting Up Data > Creating New Variables
Q Technical Reference > Setting Up Data > Creating New Variables
Q Technical Reference > Setting Up Data > Data Cleaning QScripts
Q Technical Reference > Updating and Automation > Automation Online Library
Q Technical Reference > Updating and Automation > JavaScript > QScript > QScript Examples Library
Q Technical Reference > Updating and Automation > JavaScript > QScript > QScript Examples Library > QScript Online Library
User Interface > Transformation