Create New Variables - Midpoint Coding and Quantification

From Q
Jump to: navigation, search

This QScript automatically recodes the labels of categorical variables into new numeric variables, quantifying the information contained within the labels of input variable(s).

To apply the same recoding without generating new variables use the QScript Recoding - Midpoint Coding and Quantification instead.

Example

Midpoint.png

Technical details

This QScript looks for Pick One, Pick One - Multi, Number, and Number - Multi questions that have numbers or ranges of numbers in their labels and:

  • Attempts to set values for the question that correspond to the numbers in the labels for each category, or midpoints thereof.
  • Creates a new Number or Number - Multi question for each.

Where labels contain a single number, this value will be used. If no number is detected in the label, then the value of NaN will be assigned. Recoding will only be applied for questions that have three or more labels containing numbers.

Where the label contains a range of numbers, for example 18 to 24 then the midpoint value will be used (for example 21 in this case). If a question is recoded according to mid-points and it contains a lower label like Less than 18 then the midpoint will be half-way between zero and the number in the label (in this example 9). When the question is recoded according to mid-points and it contains an upper label like 55 or more then the midpoint will be the number in the label plus half of the previous interval - so if the previous interval was 50 to 54 this midpoint will be set to 57. If no midpoint for a label can be determined then a value of NaN will be assigned.

If the labels include any kind of brackets, e.g. [[ or (, then only the text inside the brackets will be used. If there is no closing bracket (the label has been truncated) then everything after the opening bracket will be used.

Labels that contain references to time periods, such as days, weeks, minutes, and hours, or other units like litres or kilograms are difficult to recode in this way. Any such questions will be added to a separate folder in your report.

How to apply this rule

For the first time in a project

  • Select the table(s)/chart(s) that you wish to apply the rule to.
  • Start typing the name of the Rule into the Search features and data box in the top right of the Q window.
  • Click on the Rule when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Choose this rule from the list.

Additional applications of the rule

  • Select a table or chart that has the rule and any table(s)/chart(s) that you wish to apply the rule to.
  • Click on the Rules tab (bottom-left of the table/chart).
  • Select the rule that you wish to apply.
  • Click on the Apply drop-down and choose your desired option.
  • Check New items to have it automatically applied to new items that you create. Use Edit > Project Options > Save as Template to create a new project template that automatically uses this rule.

Removing the rule

  • Select the table(s)/chart(s) that you wish to remove the rule from.
  • Press the Rules tab (bottom-right corner).
  • Press Apply next to the rule you wish to remove and choose the appropriate option.

How to modify the rule

  • Click on the Rules tab (bottom-left of the table/chart).
  • Select the rule that you wish to modify.
  • Click Edit Rule and make the desired changes. Alternatively, you can use the JavaScript below to make your own rule (see Customizing Rules).

JavaScript

includeWeb('QScript R Output Functions');
includeWeb('QScript Selection Functions');
includeWeb('QScript Functions to Generate Outputs');
includeWeb("JavaScript Utilities");
includeWeb("JavaScript Array Functions");
includeWeb('QScript Questionnaire Functions');
includeWeb("QScript Table Functions");






if(!midpointAndQuantificationRecodingCustom(true))
    log("QScript cancelled.");
else
    conditionallyEmptyLog("QScript finished.");

function midpointAndQuantificationRecodingCustom(add_new_vars) {

    // On the web just take from what is selected.
    var web_mode = (!!Q.isOnTheWeb && Q.isOnTheWeb());
 
    includeWeb('QScript Utility Functions');
    includeWeb('QScript Selection Functions');
    includeWeb('QScript Value Attributes Functions');
    includeWeb('QScript Functions to Generate Outputs');
    includeWeb('QScript Data Reduction Functions');
    includeWeb('QScript Functions for Combining Categories');
    includeWeb('QScript Functions for Processing Arrays');
 
    function labelsReferToUnits(label_array, max_number) {
        return label_array.filter(labelRefersToUnits).length > max_number;
    }
 
    function labelRefersToUnits(label) {
        return containsSubstring(label, ["metres", "litres", "grams", "pound", "stone",])
    }
 
    function labelsReferToRates(label_array, max_number){
        return label_array.filter(labelRefersToRates).length > max_number;
    }
 
    function labelRefersToRates(label) {
        return containsSubstring(label, ["per", "each", "every"]);
    }
 
 
    // Recode the question as appropriate.
    // Return an object containing:
    // - question - the original question
    // - incorrect - if we think the recoding is likely to be incorrect because the labels are ambiguous
    // - midpoints - flag is true if we used midpoint coding
    // - quantified - flag is true if we did not use midpoint coding but instead quantified the labels
    function recodeQuestionByMidpointsAndQuantification(question) {
 
        var coded_by_midpoints = false;
        var could_be_incorrect = false;
        var coded_by_quantification = false;
 
        var labels = valueLabels(question);
 
 
        var current_values = question.uniqueValues;
 
        var midpoints = computeMidpointsInQuestionCustom(question, [" to ", "-"]);
        if (numberOfMidpoints(midpoints) >= 3 ) {
            // Try to recode with midpoints first
            if (labelsReferToTime(labels, 1) || labelsReferToRates(labels, 1) || labelsReferToUnits(labels, 1))
                could_be_incorrect = true;
 
            coded_by_midpoints = true;
 
            current_values.forEach(function (value) {
                setValueForVariablesInQuestion(question, value, getInferredValueFromMidpoints(midpoints, value));
            });
        } else {
            // Code by quantification
 
            // If more than half of the labels contain strings in brackets eg (...)
            // Then grab the string from inside the brackets rather than looking at
            // the full labels/
            var labels_in_brackets = extractStringsFromBrackets(labels);
            var num_in_brackets = labels_in_brackets.filter(function (label) { return label.length > 0; }).length; 
            var quantified_labels;
            if (num_in_brackets > labels.length / 2)
                quantified_labels = labels_in_brackets.map(quantify);
            else
                quantified_labels = labels.map(quantify)
 
            if (labelsReferToTime(labels, 1) || labelsReferToRates(labels, 1) || labelsReferToUnits(labels, 1))
                could_be_incorrect = true;   
 
            coded_by_quantification = true;
 
            for (var k = 0; k < current_values.length; k++)
                setValueForVariablesInQuestion(question, current_values[k], quantified_labels[k]);
        }
 
        return { question: question, incorrect: could_be_incorrect, midpoints: coded_by_midpoints, quantified: coded_by_quantification };
    }
 
 
    // Make a copy of the question in obj, convert it to numeric, and relabel
    // it depending on how it has been recoded in the last step.
    function copyAndConvertToNumeric(obj) {
        var question = obj.question;
        var extra_label = obj.midpoints ? " - RECODED WITH MID-POINTS" : " - RECODED WITH NUMBERS FROM LABELS";
        var new_q = question.duplicate(preventDuplicateQuestionName(question.dataFile, question.name + extra_label));
        if (question.questionType.indexOf("Number") == -1)    
            new_q.questionType = question.questionType == "Pick One" ? "Number" : "Number - Multi";
        return { question: obj.question, new_question: new_q, incorrect: obj.incorrect, midpoints: obj.midpoints, quantified: obj.quantified };
    }

    // For Number and Number - Multi, do the values match the labels?
    function valuesAndLabelsMatch(q) {
        var value_attributes = q.valueAttributes;
        var unique_values = q.uniqueValues;
        for (var j = 0; j < unique_values.length; j++) {
            var val = value_attributes.getValue(unique_values[j])
            if (!isNaN(val) && val.toString() != value_attributes.getLabel(unique_values[j])) {
                return false;
            }
        }
        return true;
    }





	var selected_questions; 
	if (!web_mode) {
		// Ask the user to choose which data files to use
	    var selected_datafiles = dataFileSelection();
	 
	    // Figure out which questions look like they contain numbers in at least 3 of their labels
	    var candidate_questions = getAllQuestionsByTypes(selected_datafiles, ["Pick One", "Pick One - Multi"]);
	    var num_candidates = candidate_questions.length;
	    var questions_with_numbers = [];
	    candidate_questions.forEach(function (q) {
	        if (valueLabelsContainNumbers(q, 3))
	            questions_with_numbers.push(q);
	    })
	   
	    var numeric_questions = getAllQuestionsByTypes(selected_datafiles, ["Number", "Number - Multi"]);
	    questions_with_numbers = questions_with_numbers.concat(numeric_questions.filter(function (q) { 
	                                                                                        return valueLabelsContainNumbers(q, 3) && !valuesAndLabelsMatch(q); 
	                                                                                    }));
	 
	    if (questions_with_numbers.length == 0) {
	        log("No questions containing numbers in their labels have been found.");
	        return false;
	    }
	 
	 
	    selected_questions = selectManyQuestions("The following questions appear to have numbers in their labels.\r\n"
	            + "Please select the ones that you want to recode using the numbers from the labels, or midpoints where appropriate:", questions_with_numbers).questions;
	 
	 
	    if (selected_questions.length == 0) {
	        log("No questions selected.");
	        return false;
	    }
	} else {
		var allowed_types = ["Pick One", "Pick One - Multi", "Number", "Number - Multi"];
		selected_questions = project.report.selectedQuestions();
		var sorted_selection = splitArrayIntoApplicableAndNotApplicable(selected_questions, function (q) { return allowed_types.indexOf(q.questionType) != -1 && !q.isBanner; });
		selected_questions = sorted_selection.applicable;
		var not_applicable_questions = sorted_selection.notApplicable;	
	}
	    
 
    // First recode the original questions
    var recoding_objects = selected_questions.map(recodeQuestionByMidpointsAndQuantification);
 
    // Next create numeric copies of the questions if needed
    if (add_new_vars)
        recoding_objects = recoding_objects.map(copyAndConvertToNumeric);
 
    // Sort the recoded questions and new questions into arrays
    // based on how they've been recoded.
    var midpoint_questions = [];
    var quantified_questions = [];
    var incorrect_questions = [];
    recoding_objects.forEach(function (obj) {
        if (obj.incorrect) {
            incorrect_questions.push(obj.question);
            if (add_new_vars)
                incorrect_questions.push(obj.new_question);
        } else if (obj.midpoints) {
            midpoint_questions.push(obj.question);
            if (add_new_vars)
                midpoint_questions.push(obj.new_question);
        } else {
            quantified_questions.push(obj.question);
            if (add_new_vars)
                quantified_questions.push(obj.new_question);
        }
    })
 
    var log_messages = [];
 
    if (quantified_questions.length + midpoint_questions.length + incorrect_questions.length > 0){
        var recoded_group = project.report.appendGroup();
        recoded_group.name = "Recoded Variables";
        if (quantified_questions.length > 0) {
            var quant_group_name = "Questions recoded with numbers from labels";
            generateSubgroupOfSummaryTablesCustom(quant_group_name, recoded_group, quantified_questions);
            log_messages.push("New questions that have been recoded using numbers from the labels have been added to the folder: '" + quant_group_name + "'.");
        }
        if (midpoint_questions.length > 0) {
            var midpoint_group_name = "Questions recoded with midpoints";
            generateSubgroupOfSummaryTablesCustom(midpoint_group_name, recoded_group, midpoint_questions);
            log_messages.push("New questions that have been recoded using midpoints have been added to the folder: '" + midpoint_group_name + "'.");
        }
 
        if (incorrect_questions.length > 0) {
            var temporal_group = "Possibly incorrectly-recoded variables";
            generateSubgroupOfSummaryTablesCustom(temporal_group, recoded_group, incorrect_questions);
            log_messages.push("Recoded questions whose labels refer to time periods and other units have been added to the folder: " + temporal_group + "'. These recodings should be checked carefully.")
        }
 
        if (midpoint_questions.length > 0 || quantified_questions > 0 || incorrect_questions.length > 0)
            log_messages.push("Please check the Values of the new questions to ensure that you are happy with the computed values. This is done by right-clicking on the table and selecting Values.");
 
        if (!web_mode) {
        	conditionallyEmptyLog(log_messages.join("\r\n"));
        	simpleHTMLReport(log_messages, "Recoded Variables", recoded_group, true);
        } else {
            var web_message = "Tables showing the recoded variables have been added to the bottom of the document.";
            if (incorrect_questions.length > 0)
                web_message = web_message + "\r\nSome variables have labels which include time periods and other units of measurement, and as a result the coding may not be correct. The coding for these variables should be checked carefully."
            log(web_message);
        }
        
    }
    return true;
}    
// // Create page and title
//     const pageName = "Most Significant Results"
//     const page = project.report.appendPage('TitleOnly');
//     page.group.moveAfter(page, null);
//     page.name = pageName;
//     var titleText = page.subItems[0];
//     titleText.text = pageName;
    
//     var results_item = page.appendR(r_expression);
//     log("A table showing the most significant results has been added to the top of your Document.");
// }

// computes the midpoints for the value labels of a question.
// The labels are split up by the specified array of delimiters.
function computeMidpointsInQuestionCustom(question, delimiters) {
    var midpoints = [];
    var num_vals = question.uniqueValues.length;
    var last_range = 0;
    var source_values = question.uniqueValues;
    var all_labels = valueLabels(question);
 
    // If more than half of the labels contain strings in brackets eg (...)
    // Then grab the string from inside the brackets rather than looking at
    // the full labels/
    var labels_in_brackets = extractStringsFromBrackets(all_labels);
    var num_in_brackets = labels_in_brackets.filter(function (label) { return label.length > 0; }).length;
    if (num_in_brackets > all_labels.length / 2)
        all_labels = labels_in_brackets; // Enough of the labels contain brackets, so use the contents of the brackets in place of the original labels
 
    all_labels = all_labels.map(function (x) { return x.toLowerCase(); } );
    for (var value_i = 0; value_i < num_vals; value_i++) {
        var source_value = source_values[value_i];
        var label = all_labels[value_i];
        var array_of_values = quantifyArray(labelAsArray(label, delimiters, 2));
        if (containsSubstring(label.toLowerCase(), ["under", "less", "lower", "up to", "below"]) 
            && !isNaN(quantify(label))
            && array_of_values.filter(function (x) { return !isNaN(x); }).length == 1 
            && (value_i == 0 || value_i  == 1 || isNaN(quantify(all_labels[value_i -1])))) //Lower limit rule
            midpoint = quantify(label) / 2.0;
        else if (containsSubstring(label.toLowerCase(), ["over", "more", "greater", "\+", "plus", "above"]) 
            && !isNaN(quantify(label)) 
            && (value_i == num_vals -1 || value_i == num_vals - 2 || isNaN(quantify(all_labels[value_i + 1])))) //Upper limit rule
            midpoint = quantify(label) + last_range / 2.0;
        else 
            var midpoint = computeMidpoint(array_of_values);
        if (isNumber(midpoint)) { 
            midpoints.push({
                sourceValue: source_value,
                inferredValue: midpoint,
                genuineMidPoint: array_of_values.length == 2});
            if (array_of_values.length == 2)
                last_range = Math.abs(array_of_values[1] - array_of_values[0]);
        }
    }
    return midpoints;
}

// function generateSubgroupOfSummaryTablesCustom(group_name, within_group, question_array) {
//     var num_questions = question_array.length;
//     var new_group = within_group.appendGroup();
//     new_group.name = group_name;
//     var new_table;
//     for (var j = 0; j < num_questions; j++) {
//         new_table = new_group.appendTable();
//         new_table.primary = question_array[j];
//         var cur_below = new_table.columnStatistics;
//         cur_below.push("Average");
//         new_table.columnStatistics = cur_below;
//     }
//     return new_group;
// }


function generateSubgroupOfSummaryTablesCustom(group_name, within_group, question_array) {
    var web_mode = (!!Q.isOnTheWeb && Q.isOnTheWeb());    
    var num_questions = question_array.length;
    var new_group = within_group.appendGroup();
    new_group.name = group_name;
    var new_table;
    for (var j = 0; j < num_questions; j++) {
        if (!web_mode) {
            new_table = new_group.appendTable();
            new_table.primary = question_array[j];    
        } else {
            var page = new_group.appendPage('TitleOnly');
            var page_name = question_array[j].name;
            var titleText = page.subItems[0];
            titleText.text = page_name;
            page.name = page_name;
            new_table = page.appendTable();
            new_table.primary = question_array[j];
            var descriptive_text = page.appendText();
            descriptive_text.text = "To check the coding, select the variable under Data Sets, and then click Properties > GENERAL > DATA VALUES > Values in the settings to the right.";
            descriptive_text.top = new_table.top + new_table.height + 25;
            var cur_below = new_table.columnStatistics;
            cur_below.push("Average");
            new_table.columnStatistics = cur_below;
        }
    }
    return new_group;
}


See also