QScript Functions for Splitting Grids
Jump to navigation
Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
includeWeb("JavaScript Array Functions");
includeWeb("JavaScript Utilities");
includeWeb("QScript Selection Functions");
includeWeb("QScript Utility Functions");
function splitGridToMultis(rows = true) {
const is_displayr = inDisplayr();
const questions = selectInputQuestions(["Binary - Grid", "Numeric - Grid"], false);
if (!questions)
return false;
let all_new_questions = [];
questions.forEach(function (q) {
const new_questions = [];
let use_rows = rows;
const data_reduction = q.dataReduction;
if (data_reduction.transposed)
use_rows = !use_rows;
const type = q.questionType;
const variables = q.variables;
const split_labels = splitGridVariableLabels(variables, use_rows);
if (split_labels == null) {
log("Cannot detect the grid structure in " + q.name
+ ". You may need to " + (is_displayr ? "Reset" : "Revert")
+ " the grid first.");
return false;
}
const unique_labels = getUniqueRowOrColumnLabels(split_labels, use_rows);
const data_file = q.dataFile;
unique_labels.forEach(function (label) {
const variables = split_labels.filter(function (obj) {
return (use_rows ? obj.row_label == label : obj.column_label == label);
}).map(obj => obj.variable);
const new_variables = variables.map(v => v.duplicate());
const new_question = data_file.setQuestion(preventDuplicateQuestionName(data_file, label), (type == "Number - Grid" ? "Number - Multi" : "Pick Any"), new_variables);
new_question.needsCheckValuesToCount = false;
new_questions.push(new_question);
});
const all_new_vars = getVariablesFromQuestions(new_questions);
const last_var = q.variables[q.variables.length - 1];
data_file.moveAfter(all_new_vars, last_var);
moveQuestionsToHoverButtonIfShown(new_questions);
all_new_questions = all_new_questions.concat(new_questions);
});
if (!is_displayr) {
const new_tables = [];
all_new_questions.forEach(function (q) {
const new_table = project.report.appendTable();
new_table.primary = q;
new_tables.push(new_table);
});
project.report.setSelectedRaw([new_tables[0]]);
}
else {
project.report.setSelectedRaw(all_new_questions);
}
}
// Tidy the labels by removing any text which is common
// to the beggining of all labels, or common to the ends
function removeCommonPrefixAndSuffix(labels) {
const longest_prefix = longestCommonPrefix(labels);
if (longest_prefix.length > 0) {
const prefix_regex = new RegExp(`^${longest_prefix}`);
labels = labels.map(function (str) { return str.replace(prefix_regex, ""); });
}
// Reverse to check for suffix
labels = labels.map(reverseString);
const longest_suffix = longestCommonPrefix(labels);
if (longest_suffix.length > 0) {
const suffix_regex = new RegExp(`^${longest_suffix}`);
labels = labels.map(function (str) { return str.replace(suffix_regex, ""); });
}
// Reverse back
labels = labels.map(reverseString);
return labels;
}
function reverseString(str) {
return str.split("").reverse().join("");
}
// Search through all possible substrings of the first label which
// have length 2 or greater, and identify those which are common to all
// of the supplued labels.
// Avoid substrings at the start and end of the label, since we are looking
// for a substring which splits all of the labels into two peices.
function identifyCommonSubstringCandidatesInLabels(original_labels) {
// If any of the labels are too short to be split into
// a delimiter, suffix, and prefix
if (original_labels.some(x => x.length < 5))
return [];
const labels = original_labels.slice(); // Copy to avoid changing original
const first = labels.shift();
const candidates = [];
for (var start = 1; start <= first.length - 3; start++) {
for (var end = start + 2; end <= first.length; end++) {
const current_substring = first.substring(start, end);
if (labels.every(function (str) { return str.indexOf(current_substring) > 0; }))
candidates.push(current_substring);
}
}
return candidates;
}
// Work out if splitting the labels by the delimiter
// produces a collection of label prefixes (or suffixes)
// that are consistent with grid labelling structure.
// Label prefixes (or suffixes) are consistent with a grid
// structure if each prefix (or suffix) appears in the
// same number of labels.
// For example, a collection of labels with this pattern
// is consistent (where the delimiter is " - "):
// * Column A - Row A
// * Column B - Row A
// * Column A - Row B
// * Column B - Row B
// but the following is not:
// * Column A - Row A
// * Column B - Row A
// * Column A - Row B
// * Column B - Row B
// * Column C - Row A
// * Column D - Row A
// because Column C and Column D appear once, but Column A
// and Column B appear twice.
function delimiterProducesConsistentPrefixes(variables, trimmed_labels, delimiter, use_rows = true) {
const split_labels = splitLabelUsingDelimiter(variables, trimmed_labels, delimiter);
const unique_labels = getUniqueRowOrColumnLabels(split_labels, use_rows);
const counts = countLabelOccurrences(split_labels, unique_labels, use_rows);
return allEqual(counts) && counts[0] > 1; // Prohibit creation of single-variable sets
}
// Delimiters get a score of 0:
// + 1 if they begin with a space character
// + 1 if they end with a space character
// + 1 if they contain a punctuation character
// This is to help the algorithm avoid mistaking
// short strings of common letters when deciding
// which substring is the delimiter.
// That is, delimiters like:
// " - "
// ": "
// " ("
// Will be preferred over:
// "th", "an", "Co", etc.
function scoreDelimiter(delimiter) {
return (/^\s/.test(delimiter) ? 1 : 0)
+ (/\s$/.test(delimiter) ? 1 : 0)
+ (/[.,\/#!$%\^&\*;:{}=\-_`~()]/.test(delimiter) ? 1 : 0);
}
function splitGridVariableLabels(variables, use_rows = true) {
const labels = variables.map(function (v) { return v.label; });
const trimmed_labels = removeCommonPrefixAndSuffix(labels);
const delimiter_candidates = identifyCommonSubstringCandidatesInLabels(trimmed_labels).filter(function (x) {
return delimiterProducesConsistentPrefixes(variables, trimmed_labels, x, use_rows);
});
if (delimiter_candidates.length == 0) {
return null;
}
delimiter_candidates.sort(function (a, b) {
return scoreDelimiter(b) - scoreDelimiter(a);
});
const delimiter = delimiter_candidates[0];
return splitLabelUsingDelimiter(variables, trimmed_labels, delimiter);
}
// Create an array with one entry
// for each variable which contains the tidied row or column
// label identified for that variable based on splitting
// by the supplied delimiter.
function splitLabelUsingDelimiter(variables, trimmed_labels, delimiter) {
// Split the label of each variable by the delimiter and
// store the row and column label.
const split_labels = variables.map(function (v, ind) {
const label = trimmed_labels[ind];
const split_label = label.split(delimiter);
const column_label = split_label[0];
let row_label;
if (split_label.length > 2) {
split_label.shift();
row_label = split_label.join(delimiter);
}
else {
row_label = split_label[1];
}
return { column_label: column_label, row_label: row_label, variable: v };
});
return split_labels;
}
// Given a labels object, return the unique labels
// appearing in the row or column positions as
// specified by use_rows
function getUniqueRowOrColumnLabels(split_labels, use_rows = true) {
// Identify the unique labels for rows (or columns)
const unique_labels = split_labels.map(function (obj) {
const target = use_rows ? obj.row_label : obj.column_label;
return target;
});
return unique(unique_labels);
}
// Count the occurrences of each label from unique_labels
// in the split_labels, in the row or column position as
// specified by use_rows
function countLabelOccurrences(split_labels, unique_labels, use_rows) {
return unique_labels.map(function (label) {
const matches = split_labels.filter(function (obj) {
return (use_rows ? obj.row_label == label : obj.column_label == label);
}).length;
return matches;
});
}