Create New Variables - Rank Within Case

From Q
Jump to: navigation, search

This QScript transformation creates an R variable that shows the ranks of the responses within each case.

Example

Consider the readership dataset below

Ranking within case will apply the rank transformation for each row in the data to yield the following


Technical details

Ranking is a statistical transformation that can be used on Numeric and Ordinal data that replaces the source values in the data with their rank value after being sorted. See for example, the Wikipedia page the Wikipedia page for information on a rank transformation. To use this transformation, the user selects one or more questions that that are NumberNumeric or Categorical variables. For NumberNumeric variables, the ranking is determined with the largest value in the variable taking the rank of 1, the second largest taking the rank of 2 and so on until the values have been ranked. If there are any ties in the data, then they share the average rank. For Categorical variables, a similar procedure applies except that the categorical value attribute labels are mapped to their the source numeric values to determine the ranking.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

includeWeb("QScript Utility Functions");
includeWeb("QScript Selection Functions");
includeWeb("QScript Functions to Generate Outputs");
includeWeb("QScript R Output Functions");

function checkDuplicateVariable(variable_name) {
	let all_variables = project.dataFiles.map(d => d.variables).flat();
	let variables = all_variables.filter(v => {
		return v.name === variable_name || v.label === variable_name;
	})
	return variables.length !== 1;
}

// Helper function to get the relevant labels from a variable.
function variableToLabels(variable) {
	var question = variable.question;
	var attributes = question.valueAttributes;
	var values = variable.uniqueValues;
	var relevantValues = values.filter(function(x) {
		return !isDontKnow(attributes.getLabel(x)) && !isNaN(attributes.getValue(x)) && !attributes.getIsMissingData(x);
	});
	return getLabelsForValues(question, relevantValues);
}

function onlyUnique(value, index, self) {
	return self.indexOf(value) === index;
}

function getVariableOrQuestionLabel(variable) {
	if(/- Multi/.test(variable.question.variableSetStructure)) {
		return variable.question.name + " " + variable.label;
	} else {
		return variable.label;
	}
}
	
function rankWithinCase(data_file, variables, is_displayr, questions) {
	var structure_name = getVariableNaming(is_displayr);
	var variable_names = variables.map(x => x.label);
	var question_name = variables.map(v => v.question.name).filter(onlyUnique).join(" + ");
	var new_question_name = preventDuplicateQuestionName(data_file, "rank within case for " + question_name);
	var new_var_name = new_question_name.replace(/[^a-zA-Z0-9_@\#\$\\]/g, '_').toLowerCase() + "_";
	new_var_name = randomVariableName(16, new_var_name);
	var expr_labels = variable_names;
	var expr_names = variables.map(v => {
			return checkDuplicateVariable(v.name) ? generateDisambiguatedVariableName(v) : stringToRName(v.name);
		});
	var expr_name = [];
	var last_variable = getLastVariable(variables);
	for (i = 0; i < variables.length; i += 1) {
		expr_name[i] = stringToRName(expr_labels[i]) + " = " +  expr_names[i];
	}
	var def_prefix = 'x <- data.frame(';
	var white_spaces = " ".repeat(def_prefix.length);
	if(variables.length === 1) {
		var exp_prefix = "y <- ";
		var exp_suffix = "";
	} else {
		var exp_prefix = "t(";
		var exp_suffix = ")";
	}
	var expression = def_prefix + expr_name.join(",\n" + white_spaces) + ')\n' +
					 exp_prefix + 'apply(x, MARGIN = 1, FUN = rank, na.last = "keep", ties.method = "average")' + exp_suffix;
	if(variables.length === 1){
		warning_message = 'Ranking within case for a single variable will always produce a rank of 1 for each case since there is only one ' + 
				'observation per case. Please select more than one variable or a ' + structure_name.slice(0, -1) + ' with multiple variables to produce a non-degenerate ranking';
		if(is_displayr){
			expression += '\n' + 'warning("' + warning_message + '")\n' + 'y';			
		} else {
			expression += '\n' + 'y';
		}
	}
	try {
		var new_r_question = data_file.newRQuestion(expression, new_question_name, new_var_name, last_variable);
		var new_r_variables = new_r_question.variables;
		new_r_variables.forEach((v, vind) => v.label = expr_labels[vind]);
		if(variables.length === 1 || questions.length == 1) {
			if(/Number/.test(questions[0].questionType)) {
				new_r_question.questionType = questions[0].questionType;
			} else if(/Multi$/.test(questions[0].questionType)){
				new_r_question.questionType = "Number - Multi";
			}
		}
		
	} catch (e) {
		log("The rank transform could not be computed for this " + structure_name + " : " + e);
		return false;
	}
	if(variables.length === 1) {
		if(!is_displayr) {
			log(warning_message)
		}
	}
	if(!is_displayr){
		var new_name = prompt("Enter a name for the new rank within case question:", new_r_question.name);
		if(new_name !== new_r_question.name) {
			new_r_question.name = new_name;
		}
		var top_group_name = "Ranked Within Case Transformation";
		var new_group = generateGroupOfSummaryTables(top_group_name, [new_r_question]);
		// More recent Q versions can point the user to the new items.
		if (fileFormatVersion() > 8.65) {
			project.report.setSelectedRaw([new_group.subItems[0]]);
		} else {
			log("rank within case question named '" + new_r_question.name + "' has been added to the dataset " + data_file.name);
		}
	}
}

printTypes = function(x, conjunction) {
	var comma_separated = x.slice(0, x.length - 1);
	if(typeof(conjunction) === "undefined" || !conjunction) {
		conjunction = " or ";
	}
	return comma_separated.join(", ") + conjunction + x[x.length - 1];
}

// Check all array elements equal
function arraysEqual(array_1, array_2) {
	var are_equal = true;
	array_1.forEach(function (label, ind) {
		if (label != array_2[ind])
			are_equal = false;
	});
	return are_equal;
}
// check the Variable Set 
checkStructureAndLabels = function(questions, structure_name, is_displayr) {
	// Check all same type
	var variable_set_structures = questions.map(x => x.variableSetStructure);
	// Labels unimportant for Numeric but need to be checked for Categorical
	if(!/^Numeric/.test(variable_set_structures[0])) {
		// Check labels
		var all_variables = getVariablesFromQuestions(questions);
		var all_labels = all_variables.map(x => variableToLabels(x));
		
		// Check lengths
		if (!all_labels.every(x => x.length === all_labels[0].length)) {
			userFeedback(all_variables, all_labels, "length", getVariableNaming(is_displayr));
			return false;
		}
		// Check equal elements in same order
		if (!all_labels.every(function (label_array) { return arraysEqual(label_array, all_labels[0]); }) ) {
			userFeedback(all_variables, all_labels, "not all equal", getVariableNaming(is_displayr));
			return false;
		}
	}
	return true;
}

userFeedback = function(all_variables, variable_labels, error_type, structure_name) {
	var idx = [];
	if(error_type === "length") {
		variable_labels.some((x, x_index) => {
			if(x.length !== variable_labels[0].length){
				idx = x_index;
				return true;
			}
		});
		var pre_message = "The length of the labels should be the same for all selected variables. " + 
			"However, the selected variables don't have the same label lengths. ";
		var post_message = " If a label was miscoded consider excluding it from analysis before running the rank transform again.";
	} else {
		variable_labels.some((x, x_index) => {
			if(!arraysEqual(x, variable_labels[0])){
				idx = x_index;
				return true;
			}
		});
		var pre_message = "The labels from these " + structure_name + " do not match, and so the questions cannot be combined. ";
		var post_message = " Note that the order of the labels need to match for all selected questions the transform to occur.";
	}
	log(pre_message + "For example, the variable '" + getVariableOrQuestionLabel(all_variables[0]) + "' has " + variable_labels[0].length + 
			" labels :" + printTypes(variable_labels[0], " and ") + " while the variable '" + getVariableOrQuestionLabel(all_variables[idx]) +
			"' has " + variable_labels[idx].length + " labels :" + printTypes(variable_labels[idx], " and ") + "." +
		post_message);
}


differentTypeFeedback = function(variable_feedback, is_displayr, mixed_message) {
	var structure_name = getVariableNaming(is_displayr);
	var transformation_name = is_displayr ? "transformation" : "QScript";
	var first_var = variable_feedback[0];
	var remaining_vars = variable_feedback.filter(x => x !== first_var).filter(onlyUnique);
	log("The selected " + structure_name + " include " + printTypes([first_var, remaining_vars], " and ")  + 
		 ". These cannot be combined into a rank transform output " + structure_name.slice(0, -1) + " with this " + transformation_name + mixed_message);
}

getVariableNaming = function(is_displayr) {
	return is_displayr ? "variable sets" : "questions";
}

if (!main())
	log("QScript cancelled.");
else
	conditionallyEmptyLog("QScript finished.");

function main() {
	// Check datafile exists
	if (!requireDataFile()) {
		return false;
	}
	var structure_name = getVariableNaming(is_displayr);
	var is_displayr = (!!Q.isOnTheWeb && Q.isOnTheWeb());
	// If Q, get the user to select one data file when there is more than one.
	if (!is_displayr){
 		if(fileFormatVersion() < 13.05) {
 			log("This QScript is not supported in this version of Q. Please use release version 5.4.1.0 or later to use this QScript.");
 			return false;
 		}
		var data_file = requestOneDataFileFromProject();
		var user_input = ["Categorical", "Number"];
		var user_specified_type = selectOne('Select which input variable types you wish to rank', user_input);
		var allowed_types = user_specified_type === 0 ? ["Pick One", "Pick One - Multi"] : ["Number", "Number - Multi", "Number - Grid"];
		var candidate_questions = getAllQuestionsByTypes([data_file], allowed_types);
		if (candidate_questions.length === 0) {
			log("No " + user_input[user_specified_type] + " variable types found in the data file.");
			return false;
		}
		var selected_questions = selectManyQuestions("Select questions to rank:", candidate_questions, true).questions;
	} else {
		var allowed_types = ["Nominal - Multi", "Numeric - Multi", "Numeric - Grid", "Ordinal - Multi", "Nominal", "Numeric", "Ordinal"];
		var selected_questions = project.report.selectedQuestions();
		// Check if user hasn't selected anything
		if (selected_questions.length == 0) {
			log("To rank the variable set responses, you must select at least one variable set from a dataset with one of the following structures: "
				+ printTypes(allowed_types) + ". The selected variables should have the same structure.");
			return false;
		}
		var sorted_selection = splitArrayIntoApplicableAndNotApplicable(selected_questions, function (q) { return allowed_types.indexOf(q.variableSetStructure) != -1 && !q.isBanner; });
		selected_questions = sorted_selection.applicable;
		var mixed_message = ". The selected variable sets should also have the same structure. E.g. all Numeric variables or all Categorical " +
				"(mixing Ordinal and Nominal categorical variables is permissible for this transform so long as the label structure is the same).";
		if (sorted_selection.notApplicable.length != 0){
			log("The selected variable sets must all be of type " + printTypes(allowed_types) + mixed_message);
			return false;
		}
		var data_file = selected_questions[0].dataFile;
		// Make sure all questions are from the same data set
		if (!selected_questions.map(function (q) { return q.dataFile.name; }).every(function (type) { return type == data_file.name; })) {
			log("Variable sets are from different datasets and cannot be combined. Please select variable sets from a single dataset.");
			return false;
		}
	}
	// Grab all base variables from all selected items
	var all_variables = getVariablesFromQuestions(selected_questions);
	var variable_set_structures = selected_questions.map(x => x.variableSetStructure);
	var variable_feedback = is_displayr ? variable_set_structures : selected_questions.map(x => x.questionType);

	// If only one question selected, do the rank transform.
	if (selected_questions.length === 1 || variable_set_structures.every(x => /Numeric/.test(x))){
		rankWithinCase(data_file, all_variables, is_displayr, selected_questions);
		return true;
	} else if(variable_set_structures.every(x => /(Nominal|Ordinal)/.test(x))) {
		if(checkStructureAndLabels(selected_questions) == true){
			rankWithinCase(data_file, all_variables, is_displayr, selected_questions);
			return true;
		} else {
			return false;
		}
	} else {
		differentTypeFeedback(variable_feedback);
		return false;
	}
}


See also