Checking for Invalid Data
Jump to navigation
Jump to search
This script can be used to:
- Check that variables contain responses in the correct range
- Identify Categorical Variables that do not have value labels.
To run this script
- Change the name of the id variable that appears in quotes in the first line of code.
- Save this file to a location somewhere on your computer/network.
- Run the QScript.
This example can be run in C:\Program Files\Q\Examples\phone.sav (this may be located on a different place on your computer depending upon how Q was installed).
var id = "id"; //name of the unique id variable
var show_ids = true;
var conditions_to_check = [];
conditions_to_check.push(['q1',[1,2],""]); //checks to see if q1 contains values of 1 or 2
conditions_to_check.push(['q2',[],"true"]); //checks to see if q2 contains any values which have no labels + checks that all respondents have data
conditions_to_check.push(['q3',[],"q2 >= 5 && q2 <= 7"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['q4',[],"true"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['q5',[],"true"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['Q5_1',[0,1],"true"]); //checks for values of 0 and 1 + checks to see if respondent has data
conditions_to_check.push(['Q5_2',[0,1],"true"]);
conditions_to_check.push(['Q5_3',[0,1],"true"]);
conditions_to_check.push(['Q5_4',[0,1],"true"]);
conditions_to_check.push(['Q5_5',[0,1],"true"]);
conditions_to_check.push(['Q5_6',[0,1],"true"]);
conditions_to_check.push(['Q5_7',[0,1],"true"]);
conditions_to_check.push(['Q5_8',[0,1],"true"]);
conditions_to_check.push(['Q5_9',[0,1],"true"]);
conditions_to_check.push(['Q5_10',[0,1],"true"]);
conditions_to_check.push(['Q5_11',[0,1],"true"]);
conditions_to_check.push(['Q6_1',[0,1],"Q5_1 == 0"]); // checks for values of 0 and 1 + checks that only asked to people with missing data in q5_1
conditions_to_check.push(['Q6_2',[0,1],"Q5_2 == 0"]);
if (project.dataFiles.length != 1)
alert("Warning: multiple data files exist in this project; only the first is being examined.");
var data = project.dataFiles[0];
var n_conditions = conditions_to_check.length;
//for (var data_i in project.dataFiles) {
// var data = project.dataFiles[data_i];
var invalid_values_prefix = "Invalid values: ";
var report = "Data cleaning report (NB: this report gives information for use in data cleaning but does not perform any cleaning)\r\n";
for (var condition=0; condition<n_conditions; condition++){
var details = conditions_to_check[condition];
var name = details[0];
var results = "";
var acceptable_values = details[1]
var check_using_labels = acceptable_values.length == 0;
var variable = data.getVariableByName(name).duplicate();
var label = variable.label;
var values = variable.uniqueValues;
var invalid_values_message = invalid_values_prefix;
//checking for out of range value
for (var i=0; i<values.length; i++){
var v = values[i];
if (!isNaN(v)){
var vstring = v.toString();
if (check_using_labels ? vstring == variable.valueAttributes.getLabel(v) : acceptable_values.indexOf(parseInt(v)) == -1)
invalid_values_message += vstring + " ";
}
}
variable.deleteVariable();
results += (invalid_values_message.length == invalid_values_prefix.length ? "" : invalid_values_message);
//checking skips
var filter_expression = details[2];
var t = project.report.appendTable();
if (filter_expression != ""){
//missing data
var filter_expression = details[2];
var temp_name = "Dummy5435643";
var v = data.getVariableByName(temp_name);
if (v != null)
v.deleteVariable();
var v = data.newJavaScriptVariable("(" + filter_expression + ") && isNaN(" + name + ")", false, temp_name, temp_name, null);
var q = project.dataFiles[0].getQuestionByName("Dummy5435643")
q.isFilter = true;
t.primary = data.getVariableByName(id).question;
t.secondary = "RAW DATA";
t.filters = [v];
var output = t.calculateOutput()
var ids = output.get('Values')
if (ids.length > 0){
var invalid_ids = "Missing data (n=" + ids.length + ") ";
if (show_ids){
invalid_ids += ": ";
for (var c=0; c<ids.length ; c++)
invalid_ids += ids[c][0] + " " ;
}
results += invalid_ids;
}
v.deleteVariable();
//Excess data
var v = data.newJavaScriptVariable("(!(" + filter_expression + ") && !isNaN(" + name + "))", false, temp_name, temp_name, null);
var q = project.dataFiles[0].getQuestionByName("Dummy5435643")
q.isFilter = true;
t.primary = data.getVariableByName(id).question;
t.secondary = "RAW DATA";
t.filters = [v];
var output = t.calculateOutput()
var ids = output.get('Values')
if (ids.length > 0){
var invalid_ids = "Excess data (n=" + ids.length + ") ";
if (show_ids){
invalid_ids += ": ";
for (var c=0; c<ids.length ; c++)
invalid_ids += ids[c][0] + " " ;
}
results += invalid_ids;
}
v.deleteVariable();
}
var res = name ;
for (var c=name.length; c<20; c++)
res = res + " ";
report += "\r\n" + res + (results.length == "" ? "OK" : results);
t.deleteItem();
}
log(report + '\r\n\r\nFinished!');
See also
- QScript for an explanation of how to run this code.
- QScript Reference for technical information.
- JavaScript for information about the JavaScript programming language.