QScript Functions for Automatically Combining Categories
Jump to navigation
Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
includeWeb('QScript Functions for Geography');
// Which algorithms are to be enabled in the JS GUI?
// If more than one, allows the user to switch.
const ALLOWED_ALGORITHMS = `['Value', 'Geography', 'Pattern (CHAID)']`;
// JavaScript GUI block for the 'By Value' code path
const JS_GUI_BY_VALUE =
`
// Sort between categorical and numeric input variables
let user_inputs = is_multi ? data_box.getValues() : [data_box.getValue()];
let categorical_variables = [];
let numeric_variables = [];
if (user_inputs.length > 0)
{
const categorical_types = ['Nominal', 'Nominal - Multi',
'Ordinal', 'Ordinal - Multi'];
let selected_guids = user_inputs.map(selection => selection.guid);
var all_variables = project.dataFiles.map(dat => dat.variables).flat();
selected_variables = all_variables.filter(variable => selected_guids.includes(variable.guid));
selected_variables.forEach(variable => {
if (categorical_types.includes(variable.question.variableSetStructure))
categorical_variables.push(variable);
else
numeric_variables.push(variable);
});
}
// Method for generating categories
let available_methods = ['Tidy categories',
'Percentiles',
'Equally spaced categories',
'Custom categories'];
let use_labels_box;
let labels_contain_box;
let merge_ranges = false;
let use_labels = false;
if (categorical_variables.length > 0) {
use_labels_box = form.checkBox({name: 'formCategoriesUseLabels',
label: 'Use numbers found in category labels',
default_value: use_labels_default,
prompt: 'Select this option if your category labels include the numbers you want to group, otherwise use the underlying numeric values' });
use_labels = use_labels_box.getValue();
if (use_labels) {
labels_contain_box = form.comboBox({name: 'formUseRanges',
label: 'Labels contain',
alternatives: ['Single values', 'Ranges of values'],
default_value: (use_range_default) ? 'Ranges of values' : 'Single values',
prompt: 'Do the category labels contain single values, or do they contain ranges like \\'18 to 24\\''})
if (labels_contain_box.getValue() == 'Ranges of values') {
available_methods = ['Percentiles',
'Equally spaced categories'];
merge_ranges = true;
}
}
}
let method_selection = form.comboBox({name: 'formMethod',
label: 'Method',
alternatives: available_methods,
default_value: default_method,
prompt: 'Specify the type of categories you wish to form'});
let method = method_selection.getValue();
let method_collapsed = method.replace(/\\\s/g, '');
form.group('NEW CATEGORIES');
if (!merge_ranges) {
// Choose number of categories
if (['Tidy categories', 'Equally spaced categories'].indexOf(method) > -1) {
let num_label = (method == 'Tidy categories' ? 'Target number of categories' : 'Number of categories');
form.numericUpDown({name: 'formNumberCategories',
label: num_label,
default_value: 2,
prompt: 'Specify the number of categories to produce'});
}
// Set start and end points of range for equally spaced categories
if (method == 'Equally spaced categories') {
form.textBox({name: 'formEqualStart',
label: 'Start point',
default_value: '',
prompt: 'Leave blank to start at lowest value',
required: false});
form.textBox({name: 'formEqualEnd',
label: 'End point',
default_value: '',
prompt: 'Leave blank to end at highest value',
required: false});
form.textBox({name:'formIncrement',
label: 'Increment',
default_value: '',
required: false,
prompt: '(Optional) Specify the width of the categories. This will override the \\'Number of categories\\' setting'});
}
// Specify desired percentiles
if (method == 'Percentiles') {
form.textBox({name: 'formPercentages',
label: 'Percentages',
default_value: '10',
prompt: 'Enter the percentiles to create either as a single value, e.g. 10, or as a comma-seprated set of percentiles e.g. 70, 80, 90'});
}
// Sepcify custom intervals
if (method == 'Custom categories') {
form.textBox({name: 'formCustomBreaks',
label: 'Cut points',
required: true,
default_value: default_breaks,
prompt: 'Comma-seprated list of category boundary points, starting with the lowest desired value in the first category.'});
form.checkBox({name: 'formAlwaysIncludeEnds',
label: 'Always include highest and lowest values',
default_value: true,
prompt: 'Always include the full range of values in the new categories even if not entered in \\'Cut points\\' above'});
}
// Whether closed ends of intervals are at the start of the interval or the end.
let position_box = form.comboBox({name: 'formCutPosition',
label: 'Category boundary',
alternatives: ['Start of range', 'End of range'],
default_value: method == 'Percentiles' ? 'End of range' : 'Start of range',
prompt: ''});
} else {
form.numericUpDown({name: 'formNumberCategories',
label: 'Number of categories',
default_value: 2,
prompt: 'Specify the number of categories to produce'});
}
if (!merge_ranges) {
form.group('NEW LABELS')
// Choosing label style
let alternatives = ['Tidy labels', 'Inequality notation', 'Interval notation'];
if (method == 'Percentiles')
alternatives = ['Percentiles'].concat(alternatives);
let default_label_style = alternatives[0];
let label_style_box = form.comboBox({name: 'formLabelStyle' + method_collapsed,
label: 'Label style',
alternatives: alternatives,
default_value: default_label_style,
prompt: 'Select the style to use for the new category labels.'});
// Should the first and last categories use open-ended language or exact values?
if (label_style_box.getValue() == 'Tidy labels') {
form.checkBox({name: 'formOpenEnds',
label: 'Use open-ended labels',
default_value: true,
prompt: 'When ticked, the first label will show \\'less than\\' and the last label will show \\'and more\\''});
}
// Add prefix and suffix to category labels
if (label_style_box.getValue() != 'Percentiles') {
form.textBox({name: 'formNumberPrefix',
label: 'Number prefix',
default_value: '',
required: false,
prompt: 'Text to place before numbers in labels'});
form.textBox({name: 'formNumberSuffix',
label: 'Number suffix',
default_value: '',
required: false,
prompt: 'Text to place after numbers in labels'});
// Choose number of decimals in formatting of numbers
form.numericUpDown({name: 'formLabelDecimals', label: 'Decimals in label', default_value: default_decimals, increment: 1,
prompt: 'Choose the number of decimal places to show in the data label'});
}
}
// User inputs to specify number format if
// using category labels
if (categorical_variables.length > 0 && use_labels) {
form.group('INPUT DATA LABELS');
if (use_labels_box.getValue()) {
form.textBox({name: 'formDecimalsMark',
label: 'Decimals symbol',
required: true,
default_value: '.',
prompt: 'Symbol which denotes the beginning of the decimal places in a number. In many conventions this is a period, but in other conventions it may be a comma.'});
form.textBox({name: 'formGroupingMark',
label: 'Thousands symbol',
required: false,
default_value: ',',
prompt: 'Symbol which separates the digits when numbers are greater than 1000. In many conventions this is a comma, but in other conventions it may be a period or space.'});
if (merge_ranges && method.indexOf('Perc') == -1) {
form.textBox({name: 'formLowerBound',
label: 'Start of range',
required: false,
default_value: '',
prompt: 'Enter the lowest possible value in the range. Leave blank if this information is already contained in the data labels.'})
form.textBox({name: 'formUpperBound',
label: 'End of range',
required: false,
default_value: '',
prompt: 'Enter the highest possible value in the range. Leave blank if this information is already contained in the data labels.'})
}
}
}
`;
// JavaScript GUI code for the 'By Pattern (CHAID)' code path.
const JS_GUI_CHAID =
`
form.dropBox({label: 'Based on',
types: ['Variable: Categorical, Ordered Categorical'], required: true,
name: 'formOutcomeVariable',
prompt: 'The nominal variable to determine which categories to combine'});
form.dropBox({label: 'Weight',
types: ['Variable: Numeric:weight'], required: false,
name: 'formWeightVariable',
prompt: 'Survey weights to use in the Rao-Scott Test of Independence'});
form.group('CHAID Algorithm settings')
let combine_option = form.comboBox({
name:'formAllowableCombineOption',
label: 'Combine',
alternatives: ['Any categories',
'Adjacent categories',
'Adjacent categories unless missing value code',
'Using variable set structure'],
prompt: 'Specify which pairs of categories are considered for each merge'}).getValue();
if (combine_option.startsWith('Adjacent categories'))
form.textBox({name: 'formUnorderedCategories',
label: 'Unordered categories',
required : false,
prompt: 'Specify the categories that are considered unordered. E.g. "Don\\'t know" on an ordinal scale'});
let exhaustive = form.comboBox({label: 'Use Exhaustive CHAID', name: 'formExhaustive',
alternatives: ['Yes', 'No', 'Usually'],
default_value: 'Usually',
prompt: 'Use the exhaustive CHAID algorithm or not. Choosing \\'Usually\\' will use the exhaustive algorithm if there are not too many categories'});
form.numericUpDown({label: 'Minimum category size', name: 'formMinimumCategorySize', default_value: 50,
minimum: 1, increment: 1, maximum: Number.MAX_SAFE_INTEGER,
prompt: 'Required size of each category. If smaller, it is merged with its most similar category'});
// let allow_binary_splits = form.checkBox({label: 'Allow combined categories to be resplit',
// name: 'formAllowBinarySplits', default_value: false,
// prompt: 'Allow combined categories with 3 or more levels to be resplit'});
// if (!exhaustive.getValue()) {
form.numericUpDown({label: 'Alpha level to combine categories', name: 'formAlphaMerge', default_value: 0.05,
minimum: 0, maximum: 1, increment: 0.005,
prompt: 'Significance level to combine categories in the standard CHAID (unused for exhaustive CHAID)'});
// if (allow_binary_splits.getValue()) {
// form.numericUpDown({label: 'Alpha level to split compound categories',
// name: 'formAlphaSplitMerge', default_value: 0.049,
// prompt: 'Significance level to allow compound categories to resplit.'});
// }
// }
form.numericUpDown({label: 'Alpha level to validate final combined categories', name: 'formAlphaSplitNode',
default_value: 0.05, minimum: 0, maximum: 1, increment: 0.005,
prompt: 'Significance level for the final solution'});
form.checkBox({label: 'Multiple Comparison adjustment', name: 'formBonferroniAdjust', default_value: true,
prompt: 'Use the Bonferroni multiple comparison adjustment for the final merged variable'});
`;
// JavaScript GUI code block for the 'By Geography' code path
const JS_GUI_BY_GEOGRAPHY =
`
const AVAILABLE_TYPES = [
{region: 'USA', types: ['Place (city, town, etc.)', 'ZIP code', 'County', 'State', 'Region']},
{region: 'USA and Canada', types: ['Place (city, town, etc.)', 'ZIP code/Postal code', 'State/Province', 'Region', 'Country']},
{region: 'Canada', types: ['Place (city, town, etc.)', 'Postal code', 'Province', 'Region']},
{region: 'Europe (including UK)', types: ['Place (city, town, etc.)', 'Postcode',
'Community/District', 'Province/County', 'State/Region', 'Country code']},
{region: 'Europe', types: ['Place (city, town, etc.)', 'Postcode', 'Community', 'Province', 'State', 'Country code']},
{region: 'UK', types: ['Place (city, town, etc.)', 'Postcode', 'County', 'District', 'Region', 'Country']},
{region: 'Australia and New Zealand', types: ['Place (city, town, etc.)', 'Postcode',
'Local government area (LGA)', 'State/Region']},
{region: 'Australia', types: ['Place (city, town, etc.)', 'Postcode', 'Local government area (LGA)', 'State']},
{region: 'New Zealand', types: ['Place (city, town, etc.)', 'Postcode', 'Local government area (LGA)', 'Region']}];
let region = form.comboBox({name: 'formRegion', label: 'World region', multi: false,
alternatives: ['USA and Canada', 'USA', 'Canada', 'Europe (including UK)',
'Europe', 'UK', 'Australia and New Zealand', 'Australia', 'New Zealand'],
default_value: 'USA',
prompt: 'Region of the world that the input data comes from'}).getValue();
let types = AVAILABLE_TYPES.find(obj => obj.region === region).types;
let input_types = types.slice(0,-1); // Can't map largest admin region to anything
let input = form.comboBox({name: 'formInputType' + region.replace(/[ ()]/g, ''),
label: 'Input data type',
alternatives: input_types, default_value: input_types[0],
prompt: 'Select type of input data to be recoded; e.g. postcodes or states'}).getValue();
// For output type, filter out types smaller than selected input_type; e.g. can't map state to county
// if postcode input, allow all types other than postcode itself
let place_input = input.startsWith('Place');
let output_types = place_input ? types.slice(2) : types.slice(types.indexOf(input)+1);
form.comboBox({name: 'formOutputType' + region.replace(/[ ()]/g, '') + input.replace(/[/(,.) ]/g, ''),
label: 'Output geographic type',
alternatives: output_types, default_value: output_types[0],
prompt: 'Select geographic type to convert input to; e.g., postcodes or states'});
if (!postcode_input)
form.checkBox({name: 'formCheckSpelling', default_value: check_spelling_default,
label: 'Check spelling',
prompt: 'Use Levenshtein distance to correct spelling errors in input text'});
if (/^Place/.test(input))
form.dropBox({name: 'formTextExtra', prompt: 'Provide an additional geographic variable (e.g. state or province) to further disambiguate place names in input text',
types: ['Variable:!filter'], label: 'Supplementary variable', required: false, multi: false});
`;
// JavaScript GUI code block. Combines the code blocks for all the algorithms.
// Additional information about default values for controls to be added later
// by the QScript.
const JS_GUI_COMMON =
`
form.setHeading('Automatically Combine Categories');
let data_box = form.dropBox({name: 'formVariables',
label: 'Variable' + (is_multi ? 's' : ''),
types: ['V:numeric, categorical, ordered categorical, money, text'],
multi: is_multi,
prompt: 'Select one or more Variables',
height: box_height});
let allowed_algorithms = ${ALLOWED_ALGORITHMS};
let algorithm = default_algorithm;
if (allowed_algorithms.length > 1 && !is_multi) {
let algorithm_box = form.comboBox({name:'formAlgorithm',
label: 'Combine by',
alternatives: allowed_algorithms,
default_value: default_algorithm});
algorithm = algorithm_box.getValue();
}
if (algorithm.indexOf('Value') > -1) {
${JS_GUI_BY_VALUE}
} else if (algorithm.indexOf('Pattern (CHAID)') > - 1) {
${JS_GUI_CHAID}
} else if (algorithm.indexOf('Geo') > - 1) {
${JS_GUI_BY_GEOGRAPHY}
}
`;
// R Code block for the 'By Geography' code path.
const R_CODE_BY_GEOGRAPHY =
`
first.region <- sub(" (and |[(]including).*$", "", formRegion)
getType <- function(ctrl.prefix)
sub("/[A-z ]*$", "", # strip synonym, e.g. "State/Province"
get0(ls(pattern = paste0("^", ctrl.prefix), envir = .GlobalEnv),
envir = .GlobalEnv))
library(flipGeoData)
max.dist <- ifelse(get0(\"formCheckSpelling\", ifnotfound = FALSE), 2, 0)
new.factor <- RecodeGeography(formVariables,
input.type = getType("formInputType"),
output.type = getType("formOutputType"),
region = first.region,
check.neighboring.region = first.region != formRegion,
max.levenshtein.dist = max.dist,
text.extra = get0("formTextExtra"))
new.factor <- factor(new.factor)
`;
// R Code block for the 'By Value' code path.
const R_CODE_BY_VALUE =
`
library(flipTransformations)
right <- get0("formCutPosition", ifnotfound = "") == "End of range"
input.data = as.data.frame(formVariables)
classes = lapply(input.data, FUN = class)
numerics = vapply(input.data, FUN = is.numeric, FUN.VALUE = logical(1))
use.numeric.cuts = TRUE
if(get0("formCategoriesUseLabels", ifnotfound = FALSE)) {
# User wants to try to use numbers from the labels
uniques = unique(unlist(input.data))
uniques = uniques[!is.na(uniques)]
label.chunks = lapply(X = uniques,
FUN = flipTransformations:::extractRangeInformationFromLabel,
grouping.mark = get0("formGroupingMark", ifnotfound = ","),
decimals.mark = get0("formDecimalsMark", ifnotfound = "."))
numbers.from.labels = lapply(label.chunks, FUN = function(x) return(x$numbers))
number.of.numbers = vapply(numbers.from.labels,
FUN = length,
FUN.VALUE = numeric(1))
if (!any(number.of.numbers > 0)) {
stop("The labels of the data that you have selected do not contain any ",
"numeric values and cannot be combined. If you wish to create categories ",
"using the underlying data values for these catgeories you can untick ",
"the \\'Use numbers found in category labels\\' option.")
}
if (any(number.of.numbers > 2)) {
stop("Some of the data labels contain three or more numeric ",
"values and such categories cannot be merged. Please ",
"select data whose labels contain single values, ranges ",
"of values, or numeric data. Alternatively, try merging ",
"this data with CHAID.")
}
if (formUseRanges == "Ranges of values" && length(which(number.of.numbers == 2)) > 0 && length(which(number.of.numbers == 1)) > 2) {
stop("The structure of the data labels is ambiguous and it ",
"cannot be merged with this tool. If you wish to merge ",
"data labels containing ranges of values then all but two ",
"of the labels may contain a single value. Alternatively, ",
"try merging this data with CHAID.")
}
if (formUseRanges != "Ranges of values" && length(which(number.of.numbers > 1) > 0)) {
stop("This data cannot be merged because some labels contain more than one numeric value. Try setting 'Labels contain' to 'Ranges of values'.");
}
use.numeric.cuts = formUseRanges != "Ranges of values"
}
if (use.numeric.cuts) {
method = switch(formMethod,
"Tidy categories" = "tidy.intervals",
"Percentiles" = "percentiles",
"Equally spaced categories" = "equal.width",
"Custom categories" = "custom")
label.style = switch(get0(paste0("formLabelStyle", gsub(" ", "", formMethod))),
"Tidy labels" = "tidy.labels",
"Percentiles" = "percentiles",
"Inequality notation" = "inequality.notation",
"Interval notation" = "interval.notation")
new.factor = NiceNumericCuts(input.data,
method = method,
num.categories = get0("formNumberCategories", ifnotfound = NULL),
right = right,
label.decimals = get0("formLabelDecimals", ifnotfound = NULL),
open.ends = get0("formOpenEnds", ifnotfound = FALSE),
label.style = label.style,
number.prefix = get0("formNumberPrefix", ifnotfound = ""),
number.suffix = get0("formNumberSuffix", ifnotfound = ""),
open.bottom.string = "Less than ",
closed.bottom.string = " and below",
open.top.string = "More than ",
closed.top.string = " and over",
equal.intervals.start = get0("formEqualStart", ifnotfound = 0),
equal.intervals.end = get0("formEqualEnd", ifnotfound = 100),
equal.intervals.increment = get0("formIncrement", ifnotfound = NULL),
custom.breaks = get0("formCustomBreaks", ifnotfound = NULL),
custom.always.includes.endpoints = get0("formAlwaysIncludeEnds", ifnotfound = TRUE),
percents = get0("formPercentages", ifnotfound = NULL),
factors.use.labels = get0("formCategoriesUseLabels", ifnotfound = FALSE),
grouping.mark = get0("formGroupingMark", ifnotfound = ","),
decimals.mark = get0("formDecimalsMark", ifnotfound = "."))
} else {
method = switch(formMethod,
"Percentiles" = "even.proportions",
"Equally spaced categories" = "even.ranges")
new.factor = MergeRangeCategories(input.data,
method = method,
num.categories = formNumberCategories,
upper.bound = get0("formUpperBound", ifnotfound = ""),
lower.bound = get0("formLowerBound", ifnotfound = ""),
grouping.mark = get0("formGroupingMark", ifnotfound = ","),
decimals.mark = get0("formDecimalsMark", ifnotfound = "."))
}
if (n.inputs > 1) {
names(new.factor) = names(formVariables)
}
`;
// R Code block for the 'CHAID' code path.
const R_CODE_BY_CHAID =
`
library(flipCHAID)
is.weighted <- !is.null(formWeightVariable)
max.n.levels <- if (is.weighted) 100L else 200L
n.levels <- nlevels(formVariables)
usually.selected <- formExhaustive == "Usually"
exhaustive <- if (usually.selected) n.levels < max.n.levels else formExhaustive == "Yes"
ordered <- switch(formAllowableCombineOption,
"Any categories" = FALSE,
"Adjacent categories" = TRUE,
"Adjacent categories unless missing value code" = TRUE,
"Using variable set structure"= is.ordered(formVariables))
if (usually.selected && !exhaustive)
WarnExhaustiveAlgorithmNotUsed(formVariables, is.weighted, max.n.levels)
ordinal.algorithm <- startsWith(formAllowableCombineOption, "Adjacent")
if (ordinal.algorithm) {
unordered.categories <- if (ordinal.algorithm) get0("formUnorderedCategories", ifnotfound = NULL)
sep <- if (!is.null(unordered.categories)) ifelse(grepl(";", unordered.categories), ";", ",")
unordered.categories <- ConvertCommaSeparatedStringToVector(unordered.categories, split = sep)
} else
unordered.categories <- NULL
if (ordinal.algorithm && !is.null(unordered.categories))
levels(formVariables) <- TrimLevelsForMatching(formVariables)
inspect.missing.value.code <- formAllowableCombineOption == "Adjacent categories unless missing value code"
if (inspect.missing.value.code)
unordered.categories <- unique(c(unordered.categories, MissingValueCodes(formVariables)))
control <- CHAIDcontrol(exhaustive = exhaustive,
alpha.merge = get0("formAlphaMerge", ifnotfound = NULL),
alpha.split.merge = get0("formAlphaSplitMerge", ifnotfound = NULL),
alpha.split.node = formAlphaSplitNode,
bonferroni.adjust = formBonferroniAdjust,
binary.splits.allowed = FALSE,
minimum.category.size = formMinimumCategorySize,
ordered = ordered)
new.factor <- CombineCategoriesCHAID(formVariables, formOutcomeVariable, weights = formWeightVariable,
control = control, output = "Variable",
unordered.categories = unordered.categories)
`;
// Main block of R code used by the new variable. Combines
// code blocks for the algorithms.
// Additional lines will be added to the top of this by the QScript.
const R_CODE_COMMON =
`
current.inputs <- if (is.list(formVariables)) length(formVariables) else 1L
if (current.inputs != n.inputs) {
stop("The number of input variables has changed and this variable set cannot change its size. ",
"Please run this feature again on new data.")
}
formAlgorithm <- get0("formAlgorithm", ifnotfound = "Value")
if (grepl("Value", formAlgorithm, fixed = TRUE)) {
${R_CODE_BY_VALUE}
} else if (grepl("CHAID", formAlgorithm, fixed = TRUE)) {
${R_CODE_BY_CHAID}
} else if (grepl("Geography", formAlgorithm, fixed = TRUE)) {
${R_CODE_BY_GEOGRAPHY}
}
new.factor
`;
createR_CODE_COMMON = function(extra_functionality) {
if (!extra_functionality) return R_CODE_COMMON;
return `
current.inputs <- if (is.list(formVariables)) length(formVariables) else 1L
if (current.inputs != n.inputs) {
stop("The number of input variables has changed and this variable set cannot change its size. ",
"Please run this feature again on new data.")
}
formAlgorithm <- get0("formAlgorithm", ifnotfound = "Value")
if (grepl("Value", formAlgorithm, fixed = TRUE)) {
${R_CODE_BY_VALUE}
} else if (grepl("CHAID", formAlgorithm, fixed = TRUE)) {
${new_R_CODE_BY_CHAID}
} else if (grepl("Geography", formAlgorithm, fixed = TRUE)) {
${R_CODE_BY_GEOGRAPHY}
}
new.factor
`
}
// Identify default values to place at the top of the JavaScript GUI Code
// when user has selected a 'By Geography' option.
getDefaultsForGeographicMergingGUI = function(selected_variables) {
let defaults_string =
`
let check_spelling_default = false;
`;
return defaults_string;
}
//${selected_variables[0].question.dataFile.totalN < 10000};
// Identify default values to place at the top of the JavaScript GUI Code
// when user has selected a 'By Value' option.
getDefaultsForNumericMergingGUI = function(selected_variables, method) {
if (method === null) {
method = 'Percentiles';
}
function labelsAreAllSingleValues(labels) {
let n_vals_per_label = labels.map(function (label) {
return extractNumbersFromLabel(label).length;
});
return n_vals_per_label.every(x => x == 1);
}
function labelsReferToRanges(labels) {
let n_vals_per_label = labels.map(function (label) {
return extractNumbersFromLabel(label).length;
});
let pairs = n_vals_per_label.filter(x => x == 2).length;
let singles = n_vals_per_label.filter(x => x == 1).length;
let too_many = n_vals_per_label.filter(x => x > 2).length;
return (pairs > 1 && singles < 3 && too_many == 0);
}
function extractNumbersFromLabel(label) {
let new_label = label;
new_label = new_label.replace(/\,/g, '');
new_label = new_label.replace(/\./g, '');
const regex = /\d+/g;
let found = new_label.match(regex);
return found === null ? [] : found;
}
Array.prototype.max = function() {
return Math.max.apply(null, this);
};
Array.prototype.min = function() {
return Math.min.apply(null, this);
};
function isInt(value) {
var x = parseFloat(value);
return !isNaN(value) && (x | 0) === x;
}
let categorical_variables = selected_variables.filter(function (v) {
return v.question.questionType.indexOf('Pick One') > - 1;
});
// Work out defaults
let raw_values = [];
let category_labels = [];
selected_variables.forEach(function (v) {
raw_values = raw_values.concat(v.uniqueValues);
let q = v.question;
if (q.questionType.indexOf('Pick One') > - 1) {
let data_reduction = q.dataReduction;
let labels = data_reduction.rowLabels;
if (q.questionType == 'Pick One - Multi' && !q.transposed) {
labels = data_reduction.columnLabels;
}
category_labels = category_labels.concat(labels);
}
});
category_labels = uniqueElementsInArray(category_labels);
let numeric_labels = category_labels.filter(function (label) {
return extractNumbersFromLabel(label).length > 0;
})
let use_labels_default = true;
if (categorical_variables.length > 0 && numeric_labels.length == 0) {
use_labels_default = false;
}
let is_range = false;
if (category_labels.length > 2) {
is_range = labelsReferToRanges(category_labels);
}
if (use_labels_default && !is_range && !labelsAreAllSingleValues(category_labels)) {
log("The values in this data cannot be automatically combined because "
+"the labels contain a mix of single and multiple values. To use "
+ "this feature you should reset the categories of this data, or "
+ "edit the labels so that each only contains a single value.");
return null;
}
if (is_range && ['Tidy categories', 'Custom'].indexOf(method) > -1) {
log(`The '${method}' option is not applicable for categories that contain ranges of values. The 'Percentiles' method has been used instead. Change the options on the right side of the screen to choose how the categories are being combined.`);
method = 'Percentiles';
}
raw_values = raw_values.filter(x => !isNaN(x));
let max_val = raw_values.max();
let min_val = raw_values.min();
let start = Math.floor(min_val);
let end = Math.ceil(max_val);
let interval = Math.floor((end - start) / 4);
let interval_defaults = [start, start + interval, start + 2 * interval, start + 3 * interval, end]
let interval_default_string = '\'' + interval_defaults.join(',') + '\'';
let all_integers = raw_values.every(isInt);
let defaults_string =
`
let default_breaks = ${ interval_default_string };
let default_decimals = ${(all_integers ? 0 : 1)};
let use_labels_default = ${use_labels_default};
let default_method = '${method}';
let use_range_default = ${is_range};
`
return defaults_string;
}
// Default values to add to JavaScript GUI code for options
// that the user has not selected.
const BASE_GEOGRAPHY_GUI_DEFAULTS = `let check_spelling_default = false;`;
const BASE_VALUE_GUI_DEFAULTS =
`
let default_breaks = '0, 50, 100';
let default_decimals = 1;
let use_labels_default = true;
let default_method = 'Percentiles';
let use_range_default = false;
`;
// Bring together the code which defines the default values for the JavaScript
// GUI controls based on the method run by the user and the input data.
getJSGuiDefaults = function(selected_variables, combine_type, options) {
let value_defaults = '';
let geography_defaults = '';
let chaid_defaults = '';
if (combine_type == 'Value') {
value_defaults = getDefaultsForNumericMergingGUI(selected_variables, options.method);
if (value_defaults === null)
return null;
} else {
value_defaults = BASE_VALUE_GUI_DEFAULTS;
}
if (combine_type == 'Geography') {
geography_defaults = getDefaultsForGeographicMergingGUI(selected_variables);
} else {
geography_defaults = BASE_GEOGRAPHY_GUI_DEFAULTS;
}
let combined_default_text =
`
${value_defaults}
${geography_defaults}
${chaid_defaults}
`;
return combined_default_text;
}
// Returns the index of any non-sortable category
// @param x An array of strings that contain categories
// @return The index of any categories identified as non-sortable
findNonSortableCategoriesAsIndex = function(x) {
let non_sortable_categories = x.filter(isNonSortable);
if (non_sortable_categories.length === 0)
return null;
return non_sortable_categories.map(cat => x.indexOf(cat));
}
// Main function called by Scripts in the Automatically Combine Categories menu.
// - combine_type: "Geography", "Value", "CHAID"
// - options: JS object
// - "Value" will contain: method: which corresponds to the method of numeric grouping
// - "Geography" will contain:
// - region (part of the world)
// - output_type (what geographic designation are we combining to? e.g. "state")
createAutomaticallyCombinedCategoryVariables = function (combine_type, options, test_mode = false) {
includeWeb('QScript Selection Functions');
includeWeb('QScript Utility Functions');
includeWeb('JavaScript Array Functions');
const q_allowed_version = 20.14;
const is_displayr = inDisplayr();
if (!is_displayr && Q.fileFormatVersion() < q_allowed_version) {
log('This feature requires a newer version of Q. Please contact support@q-researchsoftware.com');
return false;
}
let by_value = combine_type === 'Value';
let by_geography = combine_type === 'Geography';
let by_pattern = combine_type === 'Pattern (CHAID)';
//////////////////////////////////
// 1 Validate user selection //
//////////////////////////////////
const user_selections = getAllUserSelections();
let selected_variables = user_selections.selected_variables;
let n_selected_variables = selected_variables.length;
if (selected_variables.some(v => v.isHidden)) {
log('Some of the selected variables are hidden. Unhide these variables and then run this option again.');
return;
}
if (selected_variables.some(v => v.question.questionType == 'Date')) {
log('Some of the selected variables are Date/Time, and automatically combining categories is not supported for this type of data.');
return;
}
if (selected_variables.some(v => (v.question.questionType.indexOf('Pick Any') > -1))) {
log('Some of the selected variables are binary variables, and automatically combining categories is not supported for this type of data.');
return;
}
let selected_data_files = user_selections.selected_data_sets;
if (selected_data_files.length > 1) {
log('The selected variables come from more than one Data Set. Select variables from the same dataset and and run this feature again.');
return false;
}
function validVariable(v) {
return !v.isHidden && !v.question.isBanner && v.question.isValid;
}
let data_file = selected_data_files[0];
if (n_selected_variables === 0) {
if (is_displayr) {
log('No variables are selected. Select one or more variables under Data Sets and run this feature again.');
return false;
} else {
data_file = requestOneDataFileFromProject();
let candidate_vars = data_file.variables;
candidate_vars = candidate_vars.filter(validVariable);
let prompt;
if (by_value) {
prompt = 'Select one or more variables to use for combining categories:';
selected_variables = selectManyVariablesByQuestionNameAndLabel(prompt, candidate_vars, false).variables;
} else {
prompt = 'Select a variable to use for combining categories:';
if (by_pattern) {
candidate_vars = candidate_vars.filter(v => v.variableType.endsWith('Categorical'));
if (candidate_vars.length === 0) {
log('There are no categorical variables in this project. ');
return false;
}
}
selected_variables = [selectOneVariableByNameAndLabel(prompt, candidate_vars, false)];
}
n_selected_variables = selected_variables.length;
}
}
let pattern_variable;
if (by_geography) {
if (n_selected_variables > 1) {
log('Automatic combining of geographic data currently only supports single variables, ' +
'like ZIP Code, Post Code, State, etc. Please select a single variable and run ' +
'this feature again.');
return false;
}
}
if (by_pattern) {
if (n_selected_variables > 2) {
log('Automatic combining of categories using CHAID is only supported if one or two categorical variables ' +
'are selected. Please select one or two categorical variables and run this feature again.');
return false;
}
// This code will be updated later to handle numeric pattern variables
selected_variables = selected_variables.filter(v => v.variableType.endsWith('Categorical'));
if (n_selected_variables !== selected_variables.length) {
log('Automatic combining of categories using CHAID requires categorical variables to be selected. ' +
'Not all selected variables are categorical. Select one or two categorical variables and ' +
'run this feature again');
return false;
}
// If only a single variable remains, it must be categorical and a pattern variable needs to be selected
if (selected_variables.length === 1) {
let data_file = selected_variables[0].question.dataFile;
let possible_outcome_variables = data_file.variables.filter(v => validVariable(v) &&
v.variableType.endsWith('Categorical') &&
v.guid !== selected_variables[0].guid);
if (possible_outcome_variables.length === 0) {
log('There are no other categorical variables in the dataset to compare patterns and use ' +
'to combine categories. Please add another categorical variable to this dataset and ' +
'run this feature again.');
return false;
} else {
prompt = 'Which variable should be used to determine how to combine the categories?';
pattern_variable = selectOneVariableByLabel(prompt, possible_outcome_variables);
}
} else
pattern_variable = selected_variables.pop();
}
// 2 Identify defaults in data (R and JS GUI)
let n_variables_input = combine_type === 'Pattern (CHAID)' ? 1 : selected_variables.length;
let multi_select_dropbox = combine_type !== 'Pattern (CHAID)' && selected_variables.length > 1;
let gui_header_string = `
let default_algorithm = '${combine_type}';
let is_multi = ${multi_select_dropbox};
let box_height = ${multi_select_dropbox ? 4 : 1};
`;
let r_header_string = `n.inputs <- ${n_variables_input}`;
let js_gui_defaults = getJSGuiDefaults(selected_variables, combine_type, options);
if (js_gui_defaults === null)
return;
gui_header_string =
`
${gui_header_string}
${js_gui_defaults}
`;
// 3 Combine code for GUI
let final_gui_string = gui_header_string + '\r\n\r\n' + JS_GUI_COMMON;
// 4 Combine code for R
let final_r_string = r_header_string + '\r\n\r\n' + R_CODE_COMMON;
// 5 Get GUI selections
let variable_guids = selected_variables.map(v => v.guid);
let gui_selections = {'formVariables': variable_guids.join(';')};
if (by_pattern) {
gui_selections['formOutcomeVariable'] = pattern_variable.guid;
gui_selections['formAllowableCombineOption'] = options.allowed_merges;
var check_unordered_categories = options.allowed_merges.startsWith('Adjacent categories');
if (check_unordered_categories) {
includeWeb('JavaScript Text Analysis Functions');
let original_categories = getRFactorLevelsFromQuestionOrVariable(selected_variables[0])['labels'];
let identified_unordered_categories = findNonSortableCategoriesAsIndex(original_categories);
let prompt_msg = 'Choose any categories that are considered unordered';
let missing_values_considered = options.allowed_merges.endsWith('missing value code');
let help_url = 'Create New Variables - Automatically Combine Categories - By Pattern (CHAID) - ' +
'Adjacent Categories'
if (missing_values_considered) {
prompt_msg += ' (in addition to any categories whose Missing Values are set to ' +
' \'Include in percentages (but not averages)\')';
help_url += ' Unless Missing Value Code';
}
var unordered_categories = selectMany(prompt_msg, original_categories,
help_url,
identified_unordered_categories);
unordered_categories = unordered_categories.map(idx => original_categories[idx]);
let safe_delimiter = determineDelimiterToUse(unordered_categories);
unordered_categories = unordered_categories.join(safe_delimiter);
gui_selections['formUnorderedCategories'] = unordered_categories;
}
}
let geography_inputs;
if (by_geography) {
geography_inputs = controlInputsForGeography(selected_variables[0], options.region, options.output_type);
// IP address input detected and new variable already created
if (geography_inputs === 'IP')
return;
}
// 6 Create variable and set focus
let new_v_name = preventDuplicateVariableName(data_file, 'combined_' + makeid(), '_' );
let new_q_name = preventDuplicateQuestionName(data_file, 'Combined categories from ' + selected_variables.map(v => v.label).join(' + '));
let last = selected_variables[selected_variables.length - 1];
let new_question;
if (test_mode) {
let group = is_displayr ? project.currentPage() : project.report;
let new_output = group.appendR(final_r_string);
new_output.codeForGuiControls = final_gui_string;
new_output.setGuiControlInputRaw('formVariables', variable_guids.join(';'));
if (by_pattern) {
new_output.setGuiControlInputRaw('formOutcomeVariable', pattern_variable.guid);
new_output.setGuiControlInputRaw('formAllowableCombineOption', options.allowed_merges);
if (check_unordered_categories)
new_output.setGuiControlInputRaw('formUnorderedCategories', unordered_categories);
}
return true;
}
if (by_geography) {
try {
new_question = data_file.newRQuestion(final_r_string,
new_q_name, new_v_name,
last, final_gui_string, geography_inputs);
new_question.questionType = 'Pick One';
insertAtHoverButtonIfShown(new_question);
project.report.setSelectedRaw(new_question.variables);
} catch (e) {
let structure_name = correctTerminology('variable set');
log('Sorry, an error occurred merging the geographic data in this ' +
structure_name + ' : ' + e);
return false;
}
} else if (by_value) {
try {
new_question = data_file.newRQuestion(final_r_string, new_q_name, new_v_name, last, final_gui_string, gui_selections);
let structure = (selected_variables.length > 1) ? 'Ordinal - Multi' : 'Ordinal';
insertAtHoverButtonIfShown(new_question)
new_question.variableSetStructure = structure;
project.report.setSelectedRaw(new_question.variables);
} catch (e) {
log("" + e); // Q/Displayr interpeting e as an object instead of a string and not printing correctly
return false;
}
} else {
try {
new_question = data_file.newRQuestion(final_r_string, new_q_name, new_v_name,
last, final_gui_string, gui_selections);
insertAtHoverButtonIfShown(new_question);
new_question.variableSetStructure = 'Nominal';
project.report.setSelectedRaw(new_question.variables);
} catch(e) {
let structure_name = correctTerminology('variable set');
log('Sorry, an error occurred combining the categories by pattern using the selected ' +
structure_name + ' : ' + e);
return false;
}
}
if (!is_displayr && !test_mode) {
let new_table = project.report.appendTable();
new_table.primary = new_question;
project.report.setSelectedRaw([new_table]);
}
}