Checking for Invalid Data

From Q
Jump to navigation Jump to search

This script can be used to:

  • Check that variables contain responses in the correct range
  • Identify Categorical Variables that do not have value labels.

To run this script

  1. Change the name of the id variable that appears in quotes in the first line of code.
  2. Save this file to a location somewhere on your computer/network.
  3. Run the QScript.

This example can be run in C:\Program Files\Q\Examples\phone.sav (this may be located on a different place on your computer depending upon how Q was installed).

var id = "id";  //name of the unique id variable
var show_ids = true;
var conditions_to_check = [];
conditions_to_check.push(['q1',[1,2],""]); //checks to see if q1 contains values of 1 or 2
conditions_to_check.push(['q2',[],"true"]); //checks to see if q2 contains any values which have no labels + checks that all respondents have data
conditions_to_check.push(['q3',[],"q2 >= 5 && q2 <= 7"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['q4',[],"true"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['q5',[],"true"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['Q5_1',[0,1],"true"]); //checks for values of 0 and 1 + checks to see if respondent has data
conditions_to_check.push(['Q5_2',[0,1],"true"]); 
conditions_to_check.push(['Q5_3',[0,1],"true"]); 
conditions_to_check.push(['Q5_4',[0,1],"true"]); 
conditions_to_check.push(['Q5_5',[0,1],"true"]); 
conditions_to_check.push(['Q5_6',[0,1],"true"]); 
conditions_to_check.push(['Q5_7',[0,1],"true"]); 
conditions_to_check.push(['Q5_8',[0,1],"true"]); 
conditions_to_check.push(['Q5_9',[0,1],"true"]); 
conditions_to_check.push(['Q5_10',[0,1],"true"]);
conditions_to_check.push(['Q5_11',[0,1],"true"]);
conditions_to_check.push(['Q6_1',[0,1],"Q5_1 == 0"]); // checks for values of 0 and 1 + checks that only asked to people with missing data in q5_1
conditions_to_check.push(['Q6_2',[0,1],"Q5_2 == 0"]); 


if (project.dataFiles.length  != 1)
  alert("Warning: multiple data files exist in this project; only the first is being examined.");
var data = project.dataFiles[0];
 

var n_conditions = conditions_to_check.length;

//for (var data_i in project.dataFiles) {
//    var data = project.dataFiles[data_i];
var invalid_values_prefix = "Invalid values: ";
var report = "Data cleaning report (NB: this report gives information for use in data cleaning but does not perform any cleaning)\r\n";
for (var condition=0; condition<n_conditions; condition++){
  var details = conditions_to_check[condition];
  var name = details[0];
  var results = "";
  var acceptable_values = details[1]
  var check_using_labels = acceptable_values.length == 0;
  var variable = data.getVariableByName(name).duplicate();
  var label = variable.label;
  var values = variable.uniqueValues;
  var invalid_values_message = invalid_values_prefix;
  //checking for out of range value
  for (var i=0; i<values.length; i++){
  	var v = values[i];
  	if (!isNaN(v)){
           var vstring = v.toString();
           if (check_using_labels ? vstring == variable.valueAttributes.getLabel(v) : acceptable_values.indexOf(parseInt(v)) == -1)
                invalid_values_message += vstring + " ";
        }
  }
  variable.deleteVariable();
  results += (invalid_values_message.length == invalid_values_prefix.length ? "" : invalid_values_message);
  //checking skips
  var filter_expression = details[2];
  var t = project.report.appendTable();
  if (filter_expression != ""){
    //missing data
    var filter_expression = details[2];
    var temp_name = "Dummy5435643";
    var v = data.getVariableByName(temp_name);
    if (v != null) 
      v.deleteVariable();
    var v = data.newJavaScriptVariable("(" + filter_expression + ") && isNaN(" + name + ")", false, temp_name, temp_name, null);
    var q = project.dataFiles[0].getQuestionByName("Dummy5435643")
    q.isFilter = true;
    t.primary = data.getVariableByName(id).question;
    t.secondary = "RAW DATA";
    t.filters = [v];
    var output = t.calculateOutput()
    var ids = output.get('Values')
    if (ids.length > 0){
      var invalid_ids = "Missing data (n=" + ids.length + ") ";
      if (show_ids){
        invalid_ids += ": ";
        for (var c=0; c<ids.length ; c++)
	    invalid_ids += ids[c][0] + " " ;
      }
      results += invalid_ids;   
    }
    v.deleteVariable();
    //Excess data
    var v = data.newJavaScriptVariable("(!(" + filter_expression + ") && !isNaN(" + name + "))", false, temp_name, temp_name, null);
    var q = project.dataFiles[0].getQuestionByName("Dummy5435643")
    q.isFilter = true;
    t.primary = data.getVariableByName(id).question;
    t.secondary = "RAW DATA";
    t.filters = [v];
    var output = t.calculateOutput()
    var ids = output.get('Values')
    if (ids.length > 0){
      var invalid_ids = "Excess data (n=" + ids.length + ") ";
      if (show_ids){
        invalid_ids += ": ";
        for (var c=0; c<ids.length ; c++)
	    invalid_ids += ids[c][0] + " " ;
      }
      results += invalid_ids;   
    }
    v.deleteVariable();
  }
  var res = name ;
  for (var c=name.length; c<20; c++)
    res = res + " ";
  report += "\r\n" + res + (results.length == "" ? "OK" : results);
  t.deleteItem();
}
log(report + '\r\n\r\nFinished!');


See also