Create New Variables - Rank Within Variable

From Q
Jump to: navigation, search

This QScripttransformation creates an R variable that shows the ranks of the responses within each variable.

Example

Consider the readership dataset below

Ranking within variable will apply the rank transformation for each column in the data to yield the following

Technical details

Ranking is a statistical transformation that can be used on Numeric and Ordinal data that replaces the source values in the data with their rank value after being sorted. See for example, the the Wikipedia page for information on a rank transformation. To use this transformation, the user selects one or more questions that that are NumberNumeric or Categorical variable types. For NumberNumeric variables, the ranking is determined with the smallest value in the variable taking the rank of 1, the second smallest taking the rank of 2 and so on until all the values have been ranked. If there are any ties in the data, then they share the average rank. For Categorical variables, a similar procedure applies except that the categorical value attribute labels are mapped to their the source numeric values to determine the ranking.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

includeWeb("QScript Utility Functions");
includeWeb("QScript Selection Functions");
includeWeb("QScript Functions to Generate Outputs");
includeWeb("QScript R Output Functions");

function checkDuplicateVariable(variable_name) {
	let all_variables = project.dataFiles.map(d => d.variables).flat();
	let variables = all_variables.filter(v => {
		return v.name === variable_name || v.label === variable_name;
	})
	return variables.length !== 1;
}

function onlyUnique(value, index, self) {
	return self.indexOf(value) === index;
}

getVariableNaming = function(is_displayr) {
	return is_displayr ? "variable sets" : "questions";
}
	
function rankWithinVariable(data_file, variables, is_displayr, questions) {
	var structure_name = getVariableNaming(is_displayr);
	var variable_names = variables.map(x => x.label);
	var question_name = variables.map(v => v.question.name).filter(onlyUnique).join(" + ");
	var new_question_name = preventDuplicateQuestionName(data_file, "rank within variable for " + question_name);
	var last_variable = getLastVariable(variables);
	if(variables.length === 1) {
		var var_name = variables[0].name;
		var new_var_name = preventDuplicateVariableName(data_file, var_name);
		var expr_name = checkDuplicateVariable(var_name) ? generateDisambiguatedVariableName(variables[0]) : stringToRName(var_name);
		var expression = 'x <- ' + expr_name + '\n' +
				 'rank(x, na.last = "keep", ties.method = "average")';
		var expr_labels = [expr_name];
	} else {
		var new_var_name = new_question_name.replace(/[^a-zA-Z0-9_@\#\$\\]/g, '_').toLowerCase() + "_";
		new_var_name = randomVariableName(16, new_var_name);
		var expr_labels = variable_names;
		var expr_names = variables.map(v => {
			return checkDuplicateVariable(v.name) ? generateDisambiguatedVariableName(v) : stringToRName(v.name);
		});
		var expr_name = [];
		for (i = 0; i < variables.length; i += 1) {
			expr_name[i] = stringToRName(expr_labels[i]) + " = " +  expr_names[i];
		}
		var def_prefix = 'x <- data.frame(';
		var white_spaces = " ".repeat(def_prefix.length);
		var expression = def_prefix + expr_name.join(",\n" + white_spaces) + ')\n' +
				 'apply(x, MARGIN = 2, FUN = rank, na.last = "keep", ties.method = "average")\n';
	}

	try {
		var new_r_question = data_file.newRQuestion(expression, new_question_name, new_var_name, last_variable);
		var new_r_variables = new_r_question.variables;
		new_r_variables.forEach((v, vind) => v.label = variable_names[vind]);
		if(variables.length === 1 || questions.length == 1) {
			if(/Number/.test(questions[0].questionType)) {
				new_r_question.questionType = questions[0].questionType;
			} else if(/Multi$/.test(questions[0].questionType)){
				new_r_question.questionType = "Number - Multi";
			}
		}
	} catch (e) {
		log("The rank transform could not be computed for this " + structure_name.slice(0, -1)  + " : " + e);
		return false;
	}
	if(!is_displayr){
		var new_name = prompt("Enter a name for the new, rank within variable, question:", new_r_question.name);
		if(new_name !== new_r_question.name) {
			new_r_question.name = new_name;
		}
		var top_group_name = "Ranked Within Variable Transformation";
		var new_group = generateGroupOfSummaryTables(top_group_name, [new_r_question]);
		// More recent Q versions can point the user to the new items.
		if (fileFormatVersion() > 8.65) {
			project.report.setSelectedRaw([new_group.subItems[0]]);
		} else {
			log("rank within variable, variable set named " + new_r_question.name + " has been added to the dataset " + data_file.name);
		}
	}
}

printTypes = function(x, conjunction) {
	var comma_separated = x.slice(0, x.length - 1);
	if(typeof(conjunction) === "undefined" || !conjunction) {
		conjunction = " or ";
	}
	return comma_separated.join(", ") + conjunction + x[x.length - 1];
}


if (!main())
	log("QScript cancelled.");
else
	conditionallyEmptyLog("QScript finished.");

function main() {
	// Check datafile exists
	if (!requireDataFile()) {
		return false;
	}
	var is_displayr = (!!Q.isOnTheWeb && Q.isOnTheWeb());
	// If Q, get the user to select one data file when there is more than one.
	if (!is_displayr){
 		if(fileFormatVersion() < 13.05) {
 			log("This QScript is not supported in this version of Q. Please use release version 5.4.1.0 or later to use this QScript.");
 			return false;
 		}
		var data_file = requestOneDataFileFromProject();
		var user_input = ["Categorical", "Number"];
		var user_specified_type = selectOne('Select which input variable types you wish to rank', user_input);
		var allowed_types = user_specified_type === 0 ? ["Pick One", "Pick One - Multi"] : ["Number", "Number - Multi", "Number - Grid"];
		var candidate_questions = getAllQuestionsByTypes([data_file], allowed_types);
		if (candidate_questions.length === 0) {
			log("No " + user_input[user_specified_type] + " variable types found in the data file.");
			return false;
		}
		var selected_questions = selectManyQuestions("Select questions to rank:", candidate_questions, true).questions;
	} else {
		var allowed_types = ["Nominal - Multi", "Numeric - Multi", "Numeric - Grid", "Ordinal - Multi", "Nominal", "Numeric", "Ordinal"];
	var selected_questions = project.report.selectedQuestions();
		// Check if user hasn't selected anything
		if (selected_questions.length == 0) {
			log("To rank the variable set responses, you must select at least one variable set from a dataset with one of the following structures: "
				+ printTypes(allowed_types) + ". The selected variables should have the same structure.");
			return false;
		}
		var sorted_selection = splitArrayIntoApplicableAndNotApplicable(selected_questions, function (q) { return allowed_types.indexOf(q.variableSetStructure) != -1 && !q.isBanner; });
		selected_questions = sorted_selection.applicable;
	
		if (sorted_selection.notApplicable.length != 0){
			log("The selected variable sets must all be of type " + printTypes(allowed_types) +
				". However, the selected variable sets include type " + printTypes(sorted_selection.notApplicable) + 
				". It is not possible to rank transform variable sets of this type.");
			return false;
		}
		var data_file = selected_questions[0].dataFile;
		// Make sure all questions are from the same data set
		if (!selected_questions.map(function (q) { return q.dataFile.name; }).every(function (type) { return type == data_file.name; })) {
			log("Variable sets are from different datasets and cannot be combined. Please select variable sets from a single dataset.");
			return false;
		}
	}
	// Grab all base variables from all selected items
	var all_variables = getVariablesFromQuestions(selected_questions);
	var variable_set_structures = selected_questions.map(x => x.variableSetStructure);
	var variable_feedback = is_displayr ? variable_set_structures : selected_questions.map(x => x.questionType);
	var structure_name = getVariableNaming(is_displayr);
	// If only one question selected, do the rank transform.
	if (selected_questions.length === 1){
		rankWithinVariable(data_file, all_variables, is_displayr, selected_questions);
		return true;
	} else if(variable_set_structures.every(x => /(Numeric|Nominal|Ordinal)/.test(x))) {
		rankWithinVariable(data_file, all_variables, is_displayr, selected_questions);
		return true;
	} else {
		var variable_types = printTypes(variable_feedback.filter(onlyUnique).join(", "));
        var num_type = is_displayr ? "Numeric" : "Number";
		log("The Rank Within Variable transformation can only be computed on " + num_type + " or Categorical data structures. " + 
		    "You have selected " + structure_name + " with " + variable_types + " data structures.")
		return false;
	}
}


See also