Custom Data Files - Survey Gizmo MaxDiff

From Q
Jump to: navigation, search

This QScript makes it possible to analyse Survey Gizmo MaxDiff data in Q. Before running this script, Survey Gizmo MaxDiff and respondent data files first need to be added to the project. This script produces questions containing the best and worst choices and the respondent version in the respondent data set. An R output table containing the MaxDiff design is also produced by this QScript. These inputs can then be used to run a MaxDiff analysis using Latent Class Analysis or Hierarchical Bayes.

If run from Q, the user is prompted to select a MaxDiff data set imported from Survey Gizmo, and a respondent data set containing Response ID.

If run from Displayr, the MaxDiff variable set must be selected. If there is more than one other data set containing Response ID then the respondent data set must also be selected.

For the script to run, the attribute variables must not be grouped together. You can split the question in Q by right-clicking the variables in the Variables and Questions tab and selecting Revert to Source. In Displayr, you can do the same by first selecting the question under Data Sets and then pressing Split in the Data Manipulation ribbon.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

includeWeb("QScript R Output Functions");

function dataFileHasQuestionWithName(data_file, name)
{
    var questions = data_file.questions;
    var n_questions = questions.length;
    for (var i = 0; i < n_questions; i++)
    {
        if (questions[i].name == name)
            return true;
    }
    return false;
}

function getQuestionFromDataFile(data_file, name)
{
    var questions = data_file.questions;
    var n_questions = questions.length;
    for (var i = 0; i < n_questions; i++)
    {
        if (questions[i].name == name)
            return questions[i];
    }
    return null;
}

function isRespondentDataFile(data_file)
{
    return dataFileHasQuestionWithName(data_file, "Response ID") &&
           !(dataFileHasQuestionWithName(data_file, "Combination") &&
           dataFileHasQuestionWithName(data_file, "Attribute 1") &&
           dataFileHasQuestionWithName(data_file, "Best") &&
           dataFileHasQuestionWithName(data_file, "Worst"));
}

function isMaxDiffDataFile(data_file)
{
    return dataFileHasQuestionWithName(data_file, "Response ID") &&
           dataFileHasQuestionWithName(data_file, "Combination") &&
           dataFileHasQuestionWithName(data_file, "Attribute 1") &&
           dataFileHasQuestionWithName(data_file, "Best") &&
           dataFileHasQuestionWithName(data_file, "Worst");
}

function main()
{
    includeWeb('QScript Utility Functions');

    var data_files = project.dataFiles;

    if (data_files.length < 2)
    {
        log("There needs to be both a respondent data set and a MaxDiff data set in your project");
        return;
    }

    var is_displayr = (!!Q.isOnTheWeb && Q.isOnTheWeb());
    
    if (is_displayr)
    {
        var variables = project.report.selectedVariables();
        if (variables.length == 0)
        {
            log("A MaxDiff data set needs to be selected from 'Data Sets'.");
            return;
        }
        
        // Identify which data sets are selected
        var selected_data_file_names = new Set();
        var unique_data_files = [];
        var data_file;
        for (var i = 0; i < variables.length; i++)
        {
            data_file = variables[i].question.dataFile;
            if (!selected_data_file_names.has(data_file.name))
            {
                selected_data_file_names.add(data_file.name);
                unique_data_files.push(data_file);
            }
        }
        if (unique_data_files.length >= 3)
        {
            log("More than 2 data sets are selected. Select a MaxDiff data set and a respondent data set.");
            return;
        }
        
        var max_diff_data_file = null;
        var respondent_data_file = null;
        
        if (unique_data_files.length == 1)
        {
            max_diff_data_file = unique_data_files[0];
            if (!isMaxDiffDataFile(max_diff_data_file))
            {
                log("A MaxDiff data set needs to be selected from 'Data Sets'.");
                return;
            }
            // Iterate over all data sets in project to identify respondent data set
            var respondent_data_file_index = -1;
            for (var i = 0; i < data_files.length; i++)
            {                
                if (isRespondentDataFile(data_files[i]))
                {
                    if (respondent_data_file_index != -1)
                    {
                        log("Multiple respondent data sets have been identified. Please ensure only one data set (apart from the MaxDiff data set) contains a 'Response ID'.");
                        return;
                    }   
                    respondent_data_file_index = i;
                }
            }
            if (respondent_data_file_index > -1)
                respondent_data_file = data_files[respondent_data_file_index];
            else
            {
                log("No suitable respondent data set was found.");
                return;
            }
        }
        else // 2 data files selected
        {
            var file1 = unique_data_files[0];
            var file2 = unique_data_files[1];
            if (isMaxDiffDataFile(file1))
            {
                max_diff_data_file = file1;
                respondent_data_file = file2;
            }
            else
            {
                if (!isMaxDiffDataFile(file2))
                {
                    log("A MaxDiff data set needs to be selected from 'Data Sets'.");
                    return;
                }
                max_diff_data_file = file2;
                respondent_data_file = file1;
            }
            if (!isRespondentDataFile(respondent_data_file))
            {
                log("2 data sets have been selected and a MaxDiff data set has been identified. The other data set is required to contain a 'Response ID' but does not.");
                return;
            }
        }
    }
    else // In Q, ask user to select data sets
    {
        includeWeb('QScript Selection Functions');
        var max_diff_data_file = selectOneDataFile('Select the Survey Gizmo MaxDiff data set:', project.dataFiles);
        if (!isMaxDiffDataFile(max_diff_data_file))
        {
            log("The selected data set is not a MaxDiff data set.");
            return;
        }

        var respondent_data_file = selectOneDataFile('Select the respondent data set:', project.dataFiles);
        if (!isRespondentDataFile(respondent_data_file))
        {
            log("The selected data set is not a respondent data set.");
            return;
        }
    }

    // Get the number of alternatives per question
    var alternatives = [];
    var c = 1;
    while (true)
    {
        if (dataFileHasQuestionWithName(max_diff_data_file, "Attribute " + c))
        {
            alternatives[c - 1] = getQuestionFromDataFile(max_diff_data_file,
                                                          "Attribute " + c);
            c++;
        }
        else
            break;
    }
    var n_alternatives_per_question = c - 1;

    // Get the number of questions
    var raw_values = getQuestionFromDataFile(max_diff_data_file, "Combination").variables[0].rawValues;
    var n_questions = 0;
    for (var i = 0; i < raw_values.length; i++)
        if (n_questions < raw_values[i])
            n_questions = raw_values[i];

    // Names with backticks removed so that they can be referred to in R
    var max_diff_data_file_name = max_diff_data_file.name.replace(/`/, "\\`");
    var respondent_data_file_name = respondent_data_file.name.replace(/`/, "\\`");

    // Get comma-separated alternative question names
    var alt_question_names = "`" + max_diff_data_file_name + "`$Variables$Attribute1";
    for (var i = 1; i < n_alternatives_per_question; i++)
        alt_question_names = alt_question_names + ", `" + max_diff_data_file_name + "`$Variables$Attribute" + (i + 1);

    // Respondent and MaxDiff data set Response ID question
    var respondent_response_id = "`" + respondent_data_file_name + "`$Questions$`Response ID`";
    var maxdiff_response_id = "`" + max_diff_data_file_name + "`$Questions$`Response ID`";

    // Move best and worst selections to the respondent data set
    var expr_bestworst = "n.questions <- " + n_questions + "\n" +
            "lvls <- sort(unique(as.vector(cbind(" + alt_question_names + "))))\n" +
            "id.respondent <- " + respondent_response_id + "\n" +
            "id.max.diff <- " + maxdiff_response_id + "\n" +
            "best.max.diff <- `" + max_diff_data_file_name + "`$Variables$Best\n" +
            "worst.max.diff <- `" + max_diff_data_file_name + "`$Variables$Worst\n" +
            "n.respondents <- length(id.respondent)\n" +
            "result <- data.frame(rep(list(factor(rep(NA, n.respondents), levels = lvls)), 2 * n.questions))\n" +
            "for (i in 1:n.respondents)\n" +
            "{\n" +
            "    ind <- which(id.max.diff == id.respondent[i])\n" +
            "    for (j in seq_along(ind))\n" +
            "    {\n" +
            "        best.val <- best.max.diff[ind[j]]\n" +
            "        worst.val <- worst.max.diff[ind[j]]\n" +
            "        if (best.val %in% lvls && worst.val %in% lvls)\n" +
            "        {\n" +
            "            result[i, 2 * j - 1] <- best.val\n" +
            "            result[i, 2 * j] <- worst.val\n" +
            "        }\n" +
            "    }\n" +
            "}\n" +
            "result";
        
    var best_worst_question_name = preventDuplicateQuestionName(respondent_data_file, "Choices from " + max_diff_data_file.name);
    var temp_var_name = randomVariableName(16); // temporary name, random to (almost) guarantee uniqueness
    
    try {
        var best_worst_question = respondent_data_file.newRQuestion(expr_bestworst,
                 best_worst_question_name, temp_var_name, null);
    } catch (e)
    {
        log("Could not create question for best and worst choices: " + e);
        return false;
    }
    
    for (var i = 0; i < n_questions; i++)
    {
        best_worst_question.variables[2 * i].name = preventDuplicateVariableName(respondent_data_file, "MaxDiffBest" + (i + 1), "_");
        best_worst_question.variables[2 * i + 1].name = preventDuplicateVariableName(respondent_data_file, "MaxDiffWorst" + (i + 1), "_");
        best_worst_question.variables[2 * i].label = preventDuplicateQuestionName(respondent_data_file, "Best " + (i + 1));
        best_worst_question.variables[2 * i + 1].label = preventDuplicateQuestionName(respondent_data_file, "Worst " + (i + 1));
    }
    
    var previous_variable = best_worst_question.variables[2 * n_questions - 1];

    // Create version variable
    var expr_version = "id.respondent <- " + respondent_response_id + "\n" +
        "id.max.diff <- unique(" + maxdiff_response_id + ")\n" +
        "result <- rep(NA, length(id.respondent))\n" +
        "c <- 1\n" +
        "for (i in 1:length(id.max.diff))\n" +
        "{\n" +
        "    ind <- which(id.respondent == id.max.diff[i])\n" +
        "    if (length(ind) == 1)\n" +
        "    {\n" +
        "        result[ind] <- c\n" +
        "        c <- c + 1\n" +
        "    }\n" +
        "    else if (length(ind) > 1)\n" +
        "        stop('Respondent IDs must be unique.')\n" +
        "}\n" +
        "result";
    var version_name = preventDuplicateVariableName(respondent_data_file, "Version");
    var version_label = preventDuplicateQuestionName(respondent_data_file, "Version from " + max_diff_data_file.name);
    try {
        respondent_data_file.newRVariable(expr_version, version_name, version_label, previous_variable);
    } catch (e) {
        log("Could not create Version variable: " + e);
        return false;
    }

    var design_name = generateUniqueRObjectReferenceName(("design.for." + max_diff_data_file.name).replace(/[^(0-9|A-Z|a-z)]+/g, "."));
    
    // Create design from MaxDiff data set
    var expr_design = "n.alternatives.per.question <- " + n_alternatives_per_question + "\n" +
        "alternatives <- cbind(" + alt_question_names + ")\n" +
        "lvls <- sort(unique(as.vector(alternatives)))\n" +
        "id.respondent <- " + respondent_response_id + "\n" +
        "id.max.diff <- " + maxdiff_response_id + "\n" +
        "ind <- id.max.diff %in% id.respondent\n" +
        "design.versions <- as.numeric(factor(id.max.diff[ind]))\n" +
        "design.tasks <- unlist(lapply(table(design.versions), seq_len))\n" +
        "n.excluded <- length(unique(id.max.diff[!ind]))\n" +
        "if (n.excluded > 0)\n" +
        "    warning('The following ', ngettext(n.excluded, 'ID has', 'IDs have'),\n" +
        "            ' been excluded as ', ngettext(n.excluded, 'it was', 'they were'),\n" +
        "            ' not found in the respondent data set: ',\n" +
        "            paste0(unique(id.max.diff[!ind]), collapse = ', '))\n" +
        "design <- matrix(NA, ncol = n.alternatives.per.question + 2, nrow = sum(ind))\n" +
        "design[, 1] <- design.versions\n" +
        "design[, 2] <- design.tasks\n" +
        "for (i in 1:n.alternatives.per.question)\n" +
        "    design[, i + 2] <- as.numeric(factor(alternatives[ind, i], levels = lvls))\n" +
        "colnames(design) <- c('Version', 'Task', paste0('Alt.', 1:n.alternatives.per.question))\n" +
        design_name + " <- design";

    if (is_displayr)
    {
        var design_page = project.report.appendGroup();
        design_page.name = "Design for " + max_diff_data_file.name;
        var design = design_page.appendR(expr_design);
    }
    else
    {
        var design = project.report.appendR(expr_design);
        project.report.setSelectedRaw([design]);
    }
    log("Questions containing the best and worst choices and the respondent versions have been added to the respondent data set. In addition, a MaxDiff design has been added to the report.");
}

main();

See also