Preliminary Project Setup - Suggest Better Question Names from Source Labels

From Q
Jump to navigation Jump to search

This tool tries to improve the names of the questionsvariable sets in your data by searching through the text of the original labels in the data file. In some cases, the names supplied in the meta data of a data file contain less information than the label text, and this tool can find and replace that information for you to create better names.

When the tool runs you will be asked to specify the questionsvariable sets that you want to try to obtain better names for. It will then give you a list of potential name improvements, and you can choose which to use. If the suggested names are not meaningful then it is likely that the labels in the file do not contain better information for naming the questions.

Usage

To use this tool in Q:
  1. Select Automate > Browse Online Library > Suggest Better Question Names from Source Labels
  2. Choose the question name you wish to attempt to improve
  3. Choose any improvemnents suggested from the list

To use this tool in Displayr:

  1. Under Data Sets, select one or more variable sets whose names you wish to try to improve
  2. Select + Anything > Data > Miscelaneous > Suggest Better Variable Set Names from Source Labels
  3. Choose any improvemnents suggested from the list

Example

In this example we have a question which has been called q10 in the raw data file, but we can see in the Source Label column that the original labels in the data file contain better information about what the question asked, namely the text Q10. Why drinks more than one cola. As this text is more informative, we can use this QScript to rename q10 as Q10. Why drinks more than one cola automatically.

LabelImprovementExample.PNG

Technical details

It is usually better to instruct your data provider to add appropriate question text to the Set Label of the Multiple Response Set in the SPSS file because Q uses this text as the Question Name. In some cases, for example in older data processing software like Quantum, it is not possible to change this, and so this QScript can be used to get a tidier layout in Q. If your data provider does have control over these aspects of the formatting in the SPSS file then you should ask them to use the data file specifications linked here: SPSS Data File Specifications.

This QScript searches the Source Label of each of the variables in the selected question to find text that is common at the start and and the end of the label. If the labels have been truncated, which happens with some older data processing software, then this script will identify the longest common label suffix which shares text with the other label suffixes. That is, the script can still work even when the labels are truncated. In most cases, truncated labels in the data file will show up as messy labels in the Variables and Questions tab and on your tables.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

// Better question names for questions with more than 1 variable
//
// Sometimes Mutliple Response Sets are labeled poorly even when
// the variable labels contain decent question naming information.

includeWeb("QScript Selection Functions");
includeWeb("QScript Functions for Fixing Truncated Labels");
includeWeb("QScript Functions to Generate Outputs");
includeWeb('QScript Utility Functions');

suggestBetterQuestionNames()

function suggestBetterQuestionNames() {

    const is_displayr = inDisplayr();
    const structure_name = is_displayr ? "variable set" : "question";

    let data_file = requestOneDataFileFromProject(false);
    let selected_questions;

    if (is_displayr) {
        const user_selections = getAllUserSelections();
        selected_questions = user_selections.selected_questions;
        if (selected_questions.length == 0) {
            log("Nothing is selected. Select one or more variable sets under Data Sets and run this option again.");
            return false;
        }
    } else {
        // Get an array of all non-hidden, multiple-variable questions
        let candidate_questions = data_file.questions.filter(function (q) { return !q.isHidden && !q.isBanner && q.variables.length > 1; });

        if (candidate_questions.length == 0) {
            log("No appropriate " + structure_name + "s found.");
            return false;
        }

        selected_questions = selectManyQuestions("Select " + structure_name +
            " names to try and improve:", candidate_questions, true).questions;

        if (selected_questions.length == 0) {
            log("No " + structure_name + "s selected.");
            return false;
        }
    }


 
    let name_changes = [];
    let new_names = [];
    selected_questions.forEach(function (q) {
        let source_labels = q.variables.map(function (v) { return v.sourceLabel; });
        let prefix = longestCommonPrefix(source_labels);
        let suffix = longestCommonLabelSuffix(q, true);
        let use_suffix = prefix.length < suffix.length
        let longer = use_suffix ? suffix : prefix;
        
        // Remove spaces and punctuation characters from the start
        // of the proposed new question name
        longer = longer.replace(/^[\s:_\-.*+?^${}()\]\[@#;<>&]+/, "");

        if (longer.length > q.name.length) {
            // Make sure the proposed new name doesn't clash with any of the other proposed new names
            // or existing question names
            let new_name = preventDuplicateString(new_names, preventDuplicateQuestionName(q.dataFile, longer));
            let labels = q.variables.map(function (v) { return v.label; });
            // Check to see if any of the current variable labels contain the new question text.
            // This happens when the labels are truncated and Q has not tidied out the extra text.
            // We'll clean this up for the user later.
            let could_be_truncated = use_suffix && labels.filter(function (label) { return label.indexOf(longer) > -1; }).length > 0;
            let truncated_text = null;
            if (could_be_truncated)
                truncated_text = labelsAreTruncated(labels);
            name_changes.push({ original: q.name, improved: new_name, question: q, could_be_truncated: could_be_truncated, truncated_text: truncated_text });
            new_names.push(new_name);
        }
    });

    if (name_changes.length == 0) {
        log("Did not find any name improvements.");
        return false;
    }

    // Prompt the user to confirm the changes
    let changes_text = [];
    name_changes.forEach(function (obj) {
        changes_text.push(obj.original + "   --->   " + obj.improved);
    });
    let changes_to_make = selectMany("Select the name changes to make:", changes_text);

    let selected_changes = changes_to_make.map(function (j) { return name_changes[j]; });

    if (selected_changes.length > 0) {
        let text_item;
        let new_group;
        if (!is_displayr)
        {
            new_group = project.report.appendGroup();
            new_group.name = "Renamed Questions";
            text_item = new_group.appendText();
        }
        selected_changes.forEach(function (obj) {
            obj.question.name = preventDuplicateQuestionName(obj.question.dataFile, obj.improved);
            if (obj.could_be_truncated)
                fixLabelTruncation(obj.question, obj.truncated_text);
            if (!is_displayr)
            {
                var t = new_group.appendTable();
                t.primary = obj.question;
            }
        });

        if (!is_displayr)
        {
            let title_builder = Q.htmlBuilder();
            let text_builder = Q.htmlBuilder();
            title_builder.appendParagraph("Renamed Questions",  { font: 'Tahoma', size: 20 });
            text_builder.appendTable(selected_changes.map(function (obj) { return [obj.original, obj.improved]; }), [20, 60], null, { font: 'Lucida Console', size: 10 });
            text_item.title = title_builder;
            text_item.content = text_builder;
        } else
            project.report.setSelectedRaw(selected_questions);
        return true;
    } else {
        log("No changes selected.");
        return false;
    }
}

function preventDuplicateString(strings, new_string) {
    let altered_string = new_string;
    let counter = 1;
    while (true) {
        let is_duplicate = strings.indexOf(altered_string) != -1;
        if (!is_duplicate)
            return altered_string;
        altered_string = new_string + " " + counter;
        counter ++;
    }
}

See also