QScript Functions for Splitting Grids

From Q
Jump to navigation Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
function splitGridToMultis(rows = true) {
    includeWeb("JavaScript Utilities");
    includeWeb("QScript Utility Functions");
    includeWeb("QScript Selection Functions");
    includeWeb("JavaScript Array Functions");
    const is_displayr = inDisplayr();

    let questions = selectInputQuestions(["Binary - Grid", "Numeric - Grid"], single = false);
    if (!questions)
        return false;

    let all_new_questions = [];

    questions.forEach(function (q) {
        let new_questions = [];
        let use_rows = rows;
        let data_reduction = q.dataReduction;
        if (data_reduction.transposed)
            use_rows = !use_rows;

        let type = q.questionType;
        let variables = q.variables;
        let labels = variables.map(function (v) { return v.label; });
        let trimmed_labels = removeCommonPrefixAndSuffix(labels);
        let delimiter_candidates = identifyCommonSubstringCandidatesInLabels(trimmed_labels);
        delimiter_candidates = delimiter_candidates.filter(function (x) { 
            return delimiterProducesConsistentPrefixes(variables, trimmed_labels, x, use_rows);
        });
        
        if (delimiter_candidates.length == 0) {
            log("Cannot detect the grid structure in " + q.name 
                + ". You may need to " + (is_displayr ? "Reset" : "Revert") 
                + " the grid first.");
            return false;
        }

        delimiter_candidates.sort(function(a, b) {
            return scoreDelimiter(b) - scoreDelimiter(a);
        })
        
        let delimiter = delimiter_candidates[0];

        let labels_obj = getLabelsObject(variables, trimmed_labels, delimiter);

        let unique_labels = getUniqueRowOrColumnLables(labels_obj, use_rows); 
        let data_file = q.dataFile;
        unique_labels.forEach(function (label) {
            let variables = labels_obj.filter(function (obj) {
                return (use_rows ? obj.row_label == label : obj.column_label == label);
            }).map(obj => obj.variable);
            
            let new_variables = variables.map(v => v.duplicate());
            let new_question = data_file.setQuestion(preventDuplicateQuestionName(data_file, label), 
                                                     (type == "Number - Grid" ? "Number - Multi" : "Pick Any"),
                                                     new_variables);
            new_question.needsCheckValuesToCount = false;
            new_questions.push(new_question);
        })
        
        let all_new_vars = getVariablesFromQuestions(new_questions);
        let last_var = q.variables[q.variables.length - 1];
        data_file.moveAfter(all_new_vars, last_var);

        moveQuestionsToHoverButtonIfShown(new_questions);
        all_new_questions = all_new_questions.concat(new_questions);
        
    });


    if (!is_displayr) {
        let new_tables = [];
        all_new_questions.forEach(function (q) {
            let new_table = project.report.appendTable();
            new_table.primary = q;
            new_tables.push(new_table);
        });
        project.report.setSelectedRaw([new_tables[0]]);
    } else {
        project.report.setSelectedRaw(all_new_questions);
    }
}

// Tidy the labels by removing any text which is common
// to the beggining of all labels, or common to the ends
function removeCommonPrefixAndSuffix(labels) {
    let longest_prefix = longestCommonPrefix(labels);
    if (longest_prefix.length > 0) {
        let prefix_regex = new RegExp(`^${longest_prefix}`);
        labels = labels.map(function (str) { return str.replace(prefix_regex, "") });
    }

    // Reverse to check for suffix
    labels = labels.map(reverseString);
    let longest_suffix = longestCommonPrefix(labels);
    if (longest_suffix.length > 0) {
        let suffix_regex = new RegExp(`^${longest_suffix}`);
        labels = labels.map(function (str) { return str.replace(suffix_regex, "") }); 
    }

    // Reverse back
    labels = labels.map(reverseString);
    return labels;

}

function reverseString(str) {
    return str.split("").reverse().join("");
}

// Search through all possible substrings of the first label which
// have length 2 or greater, and identify those which are common to all
// of the supplued labels.
// Avoid substrings at the start and end of the label, since we are looking
// for a substring which splits all of the labels into two peices.
function identifyCommonSubstringCandidatesInLabels(original_labels) {
    // If any of the labels are too short to be split into
    // a delimiter, suffix, and prefix
    if (original_labels.some(x => x.length < 5))
        return [];

    let labels = original_labels.slice(); // Copy to avoid changing original
    let first = labels.shift();
    let candidates = [];

    for (var start = 1; start <= first.length-3; start++) {
        for (var end = start + 2; end <= first.length; end++) {
            let current_substring = first.substring(start, end);
            if (labels.every(function (str) { return str.indexOf(current_substring) > 0; }))
                candidates.push(current_substring);
        }
    }
    return candidates;
}

// Work out if splitting the labels by the delimiter
// produces a collection of label prefixes (or suffixes)
// that are consistent with grid labelling structure.
// Label prefixes (or suffixes) are consistent with a grid
// structure if each prefix (or suffix) appears in the
// same number of labels.
// For example, a collection of labels with this pattern
// is consistent (where the delimiter is " - "):
// * Column A - Row A
// * Column B - Row A
// * Column A - Row B
// * Column B - Row B
// but the following is not:
// * Column A - Row A
// * Column B - Row A
// * Column A - Row B
// * Column B - Row B
// * Column C - Row A
// * Column D - Row A
// because Column C and Column D appear once, but Column A 
// and Column B appear twice.
function delimiterProducesConsistentPrefixes(variables, trimmed_labels, delimiter, use_rows = true) {
    
    let labels_obj = getLabelsObject(variables, trimmed_labels, delimiter);

    let unique_labels = getUniqueRowOrColumnLables(labels_obj, use_rows);
    
    let counts = countLabelOccurrences(labels_obj, unique_labels, use_rows);

    return allEqual(counts) && counts[0] > 1; // Prohibit creation of single-variable sets
}

// Delimiters get a score of 0:
// + 1 if they begin with a space character
// + 1 if they end with a space character
// + 1 if they contain a punctuation character
// This is to help the algorithm avoid mistaking
// short strings of common letters when deciding
// which substring is the delimiter.
// That is, delimiters like:
// " - "
// ": "
// " ("
// Will be preferred over:
// "th", "an", "Co", etc.
function scoreDelimiter(delimiter) {
    return 0 + /^\s/.test(delimiter) 
             + /\s$/.test(delimiter)
             + /[.,\/#!$%\^&\*;:{}=\-_`~()]/.test(delimiter);
}

// Create a labels object which is an array with one entry
// for each variable which contains the tidied row or column
// label identified for that variable based on splitting
// by the supplied delimiter.
function getLabelsObject(variables, trimmed_labels, delimiter) {
    // Split the label of each variable by the delimiter and
    // store the row and column label.
    let labels_obj = variables.map(function (v, ind) {
        let label = trimmed_labels[ind];
        let split_label = label.split(delimiter);
        let column_label = split_label[0];
        let row_label;
        if (split_label.length > 2) {
            split_label.shift();
            row_label = split_label.join(delimiter);
        } else {
            row_label = split_label[1];
        }
        return { column_label: column_label, row_label: row_label, variable: v }
    });

    return labels_obj;
}

// Given a labels object, return the unique labels
// appearing in the row or column positions as
// specified by use_rows
function getUniqueRowOrColumnLables(labels_obj, use_rows = true) {
    // Identify the unique labels for rows (or columns)
    let unique_labels = labels_obj.map(function (obj) { 
        let target = use_rows ? obj.row_label : obj.column_label;
        return target;
    });
    return unique(unique_labels);
}

// Count the occurrences of each label from unique_labels
// in the labels_obj, in the row or column position as
// specified by use_rows
function countLabelOccurrences(labels_obj, unique_labels, use_rows) {
    return unique_labels.map(function (label) {
        let matches = labels_obj.filter(function (obj) {
            return (use_rows ? obj.row_label == label : obj.column_label == label);
        }).length;
        return matches;
    });
}