QScript Functions for Model Simulator
Jump to navigation
Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
includeWeb("QScript R Output Functions");
includeWeb("QScript Selection Functions");
function truncateStringWithElipses(string, max_chars) {
// There may be no truncation necessary
return string.length < max_chars ? string : (string.substring(0, max_chars - 4) + '...');
}
// For single-variable question, just use variable label, truncating if necessary
// For multiple-variable questions:
// - If concactenation of labels is short enough, return the concatentation
// - If the you can truncate the question name sensibly and concatenate then do so
// - Otherwise if truncating the question name would leave too few characters,
// just return the label.
function determinePredictorLabel(variable, max_chars = 100) {
const single_variable = ["PickOneMulti", "PickAnyGrid", "NumberMulti", "NumberGrid", "PickAny"].indexOf(variable.questionType) == -1;
const var_label = variable.label;
const q_name = variable.question;
if (single_variable) {
return truncateStringWithElipses(var_label, max_chars);
}
if (var_label.length + q_name.length + 2 < max_chars) {
return q_name + ': ' + var_label;
}
if (var_label.length > (max_chars - 10)) {
return var_label;
}
return truncateStringWithElipses(q_name, max_chars - var_label.length - 5) + ': ' + var_label;
}
function getDesiredBoxHeight(page, font_size) {
const temp_box = page.appendText();
const temp_html = Q.htmlBuilder();
temp_html.appendParagraph('temp', { size: font_size });
temp_box.content = temp_html;
const desired_height = temp_box.height;
temp_box.deleteItem();
return desired_height;
}
const _resizeTextBoxToMinWidthForTesting = resizeTextBoxToMinWidth;
function resizeTextBoxToMinWidth(textbox, desired_height, max_width = 1000) {
// Reducing the width of a textbox with text in it will eventually cause the text to wrap, increasing the height
// Need `width > 0` guard against text boxes with no text in them (& mock text boxes), which will not wrap
while (textbox.height <= desired_height && textbox.width > 0) {
textbox.width = textbox.width / 2;
}
const increment = 5;
while (textbox.height > desired_height && textbox.width < max_width) {
textbox.width = textbox.width + increment;
}
// Just to be safe in other browsers?
// JW observed wrapping in safari without
// the next line.
textbox.width = textbox.width + increment;
}
// test_mode = true allows this to be run in a QScript test
function createPredictiveModelSimulator(test_mode = false) {
let { selected_r_outputs } = getAllUserSelections();
if (test_mode) {
// Model should be at the base of the report for the test recording
selected_r_outputs = [project.report.subItems[0]];
}
const bad_selection_message = 'No Regression or Machine Learning model has been selected, please select ' +
'a model to use this feature';
if (selected_r_outputs.length === 0) {
log(bad_selection_message);
return false;
}
const selected_item = selected_r_outputs[0];
const classes = selected_item.outputClasses || [];
const regression_selected = classes.includes('Regression');
if (!regression_selected && !classes.includes('MachineLearning')) {
log(bad_selection_message);
return false;
}
// Valid input given
const is_stacked_regression = regression_selected && isStackedRegression(selected_item);
if (is_stacked_regression) {
log("Stacked regression models are not supported by the simulator");
return false;
}
// Ensure model has been computed recently and so has required attributes
const model_names = selected_item.data.getAttribute([], "names");
if (model_names.indexOf("estimation.data.template") == -1) {
log("Recompute the selected model before using this script");
return;
}
const warning = selected_item.warning || '';
if (/categories do not appear in the data/.test(warning)) {
log('Some categories are not present in the data used to construct the model so it is not possible ' +
'to use the simulator to predict outcomes for those categories.');
}
const current_page = project.currentPage();
const parent = test_mode || current_page === null ? project.report : current_page.group;
// Create a new page
const page_name = 'Simulator';
const new_page = parent.appendPage('TitleOnly');
if (!test_mode) {
parent.moveAfter(new_page, current_page);
}
const title_text = new_page.subItems[0];
new_page.name = page_name;
project.report.setSelectedRaw([new_page]);
title_text.text = page_name;
// Specify the heights, widths, and padding
// for the items which will be laid out on
// the page
const big_padding = 60;
const small_padding = 10;
let big_h_pad = 35;
const small_h_pad = 10;
const title_bottom = title_text.top + title_text.height;
let control_width = 180;
let output_width = 250;
let bottom_last = title_bottom + big_padding + 25;
const text_height = getDesiredBoxHeight(new_page, 10);
let max_char_width = 75;
if (new_page.width > 1030) {
const expansion = (new_page.width / 1026);
max_char_width = expansion * max_char_width;
big_h_pad = expansion * big_h_pad;
output_width = expansion * output_width;
control_width = expansion * control_width;
}
const final_left = new_page.width - (output_width + big_h_pad);
// Go through predictors, identify what type of variable each is, and map onto
// a control with the relevant values.
const model_variables = getModelVariableInfoFromTemplate(selected_item);
const predictors = model_variables.filter(x => !x.is_outcome);
// let outcome = selected_item.getInput('formOutcomeVariable');
// let predictors = selected_item.getInput('formPredictorVariables');
const is_CART_model = classes.indexOf('CART') > -1;
const abbreviated_predictors = is_CART_model && selected_item.getInput('formPredictorCategoryLabels') !== 'Full labels';
const abbreviated_outcome = is_CART_model && selected_item.getInput('formOutcomeCategoryLabels') !== 'Full labels';
const data_lines = [];
const control_rows = [];
const categorical_variable_names = [];
const categorical_combo_names = [];
const categorical_variable_labels = [];
let includes_categorical_predictors = false;
predictors.forEach(function (variable, ind) {
const is_numeric = variable.is_numeric; // Determined by QuestionType of variable
const is_binary = variable.is_binary;
if (!is_numeric && !is_binary) {
includes_categorical_predictors = true;
}
const text_label = appendPredictorLabel(new_page, variable, max_char_width, bottom_last, small_padding);
resizeTextBoxToMinWidth(text_label, text_height, 1000);
const v_name = variable.name;
const control_selected_value = variable.default_value;
const items = variable.levels;
const new_control = appendPredictorControl(new_page, test_mode, is_numeric, text_label, small_padding, control_width, ind, control_selected_value);
if (!is_numeric) {
new_control.itemList = items;
new_control.selectedItems = [control_selected_value];
if (!is_binary) {
categorical_combo_names.push(new_control.name);
categorical_variable_names.push(v_name);
categorical_variable_labels.push(variable.label);
}
data_lines.push(getDataLineForCategoricalPredictor(is_binary, v_name, abbreviated_predictors, new_control, variable, is_CART_model, items));
}
else {
new_control.text = control_selected_value.toString();
data_lines.push('"' + v_name + '" = as.numeric(' + new_control.name + ')');
}
bottom_last = text_label.top + text_label.height;
control_rows.push({ label: text_label, control: new_control });
});
// Set positions of text and controls
const label_right_anchor = new_page.width - (control_width + output_width + big_h_pad + 2 * small_h_pad);
control_rows.forEach(function (obj) {
obj.label.left = label_right_anchor - obj.label.width;
obj.control.left = label_right_anchor + small_h_pad;
});
// Create an R output which calls predict using the input controls
// R code for data frame from controls
const model_name = selected_item.name;
const categorical_predictors_code = categoricalPredictorsBlock(model_name, categorical_variable_names, categorical_variable_labels, categorical_combo_names, includes_categorical_predictors, is_CART_model, classes, predictors);
const df_code = dataFrameBlock(data_lines);
const probabilities_code = probabilitiesBlock();
if (abbreviated_outcome || abbreviated_predictors) {
log('Abbreviated levels were used in the CART model, and these will appear in the simulator. If you want to show the full category labels,'
+ " please make sure to set both Inputs > Predictor category labels and Inputs > Outcome category labels to 'Full labels'.");
}
const is_numeric_outcome = determineIfModelHasNumericOutcomeVariable(classes, selected_item);
const prediction_text = appendPredictionText(new_page, is_numeric_outcome, title_bottom, big_padding, output_width, final_left);
const predict_call_code = predictCallBlock(is_numeric_outcome ? '"vector"' : '"class"');
const final_code = categorical_predictors_code + df_code + '\n' + predict_call_code;
if (test_mode) {
testModeReport(classes, control_rows, final_code);
}
const prediction_output = new_page.appendR(final_code);
prediction_output.update();
if (!test_mode) {
arrangePredictionOutput(prediction_output, prediction_text, small_padding, output_width, final_left);
}
prediction_output.referenceName = generateUniqueRObjectReferenceName('model.predicted.outcome');
if (!is_numeric_outcome) {
const probability_text = appendProbabilityText(new_page, prediction_output, small_padding, final_left);
const probability_output = new_page.appendR(categorical_predictors_code + df_code + '\n' + probabilities_code);
probability_output.referenceName = generateUniqueRObjectReferenceName('model.predicted.probabilities');
probability_output.update();
if (!test_mode) {
arrangeProbabilityOutput(probability_output, probability_text, final_left, small_padding, output_width);
}
}
}
function createFakeControl(type, ind, value) {
const control_name = "fakeControl_" + ind;
const new_control = { type: type, name: control_name };
const r_output = project.report.appendR(control_name + " <- '" + value + "'");
return new_control;
}
function printObject(o) {
let out = '';
for (const p in o) {
out += p + ': ' + o[p] + '\r\n';
}
log(out);
}
// Generate the code which begins the R outputs.
// The main purpose is to organize information
// about categorical predictors in the model.
function categoricalPredictorsBlock(model_name, categorical_variable_names, categorical_variable_labels, categorical_combo_names, includes_categorical_predictors, is_CART_model, classes, variables) {
let categorical_predictors_code = 'input.model = ' + model_name + '\n';
if (includes_categorical_predictors) {
categorical_predictors_code +=
`all.combo.boxes <- c(${categorical_combo_names.join(', ')})
estimation.data.template <- input.model$estimation.data.template
all.variable.names <- names(estimation.data.template)
outcome.name <- attr(estimation.data.template, "outcome.name")
all.predictor.names <- setdiff(all.variable.names, outcome.name)
predictor.templates <- estimation.data.template[all.predictor.names]
all.predictor.labels <- vapply(predictor.templates, "[[", character(1L), "label")
xlevels <- lapply(predictor.templates, FUN = function (this.var) {
if (!is.null(this.var[["observed.short.levels"]]))
return(this.var[["observed.short.levels"]])
if (inherits(input.model, "LDA"))
return(this.var[["levels"]])
this.var[["observed.levels"]]
})
names(xlevels) <- all.predictor.names
xlevels <- Filter(length, xlevels)
all.predictor.names <- names(xlevels)
levels.not.in.data <- mapply(function(var.name, combo.name) !combo.name %in% xlevels[[var.name]],
all.predictor.names, all.combo.boxes)
if (any(levels.not.in.data)) {
relevant.combos <- all.combo.boxes[levels.not.in.data]
relevant.vars <- all.predictor.names[levels.not.in.data]
matches <- mapply(function(var.name, combo.name) vapply(xlevels[[var.name]], function(x) sub(combo.name, "", x), character(1L)),
relevant.vars, relevant.combos, SIMPLIFY = FALSE)
white.space.diffs <- lapply(matches, function(x) grepl("^\\\\s+$", x))
if (any(unlist(white.space.diffs))) {
var.to.use <- which.max(vapply(white.space.diffs, any, logical(1L)))
StopForUserError("This feature is not compatible with category labels that have surrounding white space. ",
"To use this feature the category ", dQuote(xlevels[[names(var.to.use)]][white.space.diffs[[var.to.use]]]),
" needs to be renamed to ", dQuote(relevant.combos[var.to.use]), " in the predictor variable ",
sQuote(all.predictor.labels[levels.not.in.data][var.to.use]))
}
first.bad.level = which.max(levels.not.in.data)
StopForUserError("The selected category ", dQuote(all.combo.boxes[first.bad.level]), " in the predictor variable ",
sQuote(all.predictor.labels[first.bad.level]), " is not observed in the dataset used to construct the model ",
"and consequently the model cannot predict the outcome. Please select a different category for this ",
"predictor variable.")
}
`;
}
return categorical_predictors_code;
}
// Generate the R code which defines the data frame containing
// the values for the desired prediction.
function dataFrameBlock(data_lines) {
return 'DF = data.frame(' + data_lines.join(',\n ') + ', check.names = FALSE)';
}
// Generate the remainder of the R code required for
// computing probabilities
function probabilitiesBlock() {
return `
library(flipRegression)
library(flipData)
library(flipTransformations)
model.classes = class(input.model)
if ("SupportVectorMachine" %in% model.classes) {
svm.probs <- e1071:::predict.svm(input.model$original, newdata = DF, probability = TRUE)
new.probs <- attr(svm.probs, "probabilities")
} else if ("RandomForest" %in% model.classes) {
new.probs <- flipMultivariates:::randomForestExtractVariables(input.model, "prob", newdata = DF)
} else if ("DeepLearning" %in% model.classes) {
if (reticulate::py_is_null_xptr(input.model$original))
input.model$original <- keras::unserialize_model(input.model$original.serial)
X <- as.matrix(flipTransformations::AsNumeric(DF))
constants <- input.model$training.stdevs == 0
if (input.model$normalize)
X[, !constants] <- scale(X[, !constants, drop = FALSE],
center = input.model$training.means[!constants],
scale = input.model$training.stdevs[!constants])
new.probs <- predict(input.model$original, X)
if (length(input.model$outcome.levels) == 2)
new.probs <- cbind(1 - new.probs,new.probs)
colnames(new.probs) <- input.model$outcome.levels
} else if ("CART" %in% model.classes) {
new.probs <- tryCatch(rpart:::predict.rpart(input.model, newdata = DF, type = "prob"),
error = function(e){
if (grepl("new level", e$message))
StopForUserError("Cannot match categories. Please set Inputs > Predictor category labels to 'Full labels' in the CART model.")
else
e
})
} else if ("GradientBoost" %in% model.classes) {
new.probs <- flipMultivariates:::predict.GradientBoost(input.model, newdata = DF, keep.soft.probs = TRUE)
if (length(new.probs) == 1L)
new.probs <- cbind(1 - new.probs, new.probs)
colnames(new.probs) <- input.model$outcome.levels
} else if ("LDA" %in% model.classes) {
DF <- AsDataFrame(DF, use.names = TRUE,
ignore.columns = "",
categorical.as.binary = TRUE,
remove.first = TRUE)
new.probs <- flipMultivariates:::ldaExtractVariables(input.model, "posterior", input.model$prior, newdata = DF, na.action = na.pass)
} else if ("BinaryLogitRegression" %in% model.classes) {
new.probs <- Probabilities(input.model, newdata = DF)
} else if ("MultinomialLogitRegression" %in% model.classes) {
new.probs <- flipRegression:::Probabilities.Regression(input.model, newdata = DF)
if (nrow(new.probs) == 1L)
colnames(new.probs) <- input.model$original$lev
else
new.probs <- new.probs[, 2, drop = FALSE]
} else if ("OrderedLogitRegression" %in% model.classes) {
new.probs <- flipRegression:::Probabilities.Regression(input.model, newdata = rbind(DF, DF)) #Doesn't like new data with a single row
new.probs <- as.matrix(new.probs[1, , drop = FALSE])
}
if (ncol(new.probs) > 1)
new.probs <- t(new.probs)
colnames(new.probs) <- "Probability (%)"
new.probs <- new.probs * 100
${generateUniqueRObjectReferenceName('predictions')} <- new.probs
`;
}
// Generatre the remainder of the code required for
// computing the predicted outcome value
function predictCallBlock(vector_or_class) {
return `
is.cart <- inherits(input.model, "CART")
is.lda <- inherits(input.model, "LDA")
prediction.function <- predict
if (is.lda) {
DF <- AsDataFrame(DF, use.names = TRUE,
ignore.columns = "",
categorical.as.binary = TRUE,
remove.first = TRUE)
}
arguments <- list(input.model, newdata = DF)
if (is.cart) {
prediction.function <- rpart:::predict.rpart
type <- ${vector_or_class}
arguments <- c(arguments, type = type)
}
if (is.lda) {
prediction.function <- flipMultivariates:::ldaExtractVariables
arguments$type <- "class"
arguments$prior <- input.model$prior
arguments$na.action <- na.pass
}
as.vector(do.call(prediction.function, arguments))`;
}
// Append the "title" for the predicted value as a text box.
function appendPredictionText(new_page, is_numeric_outcome, title_bottom, big_padding, output_width, final_left) {
const prediction_text = new_page.appendText();
prediction_text.text = 'Predicted ' + (is_numeric_outcome ? 'value' : 'category');
prediction_text.top = title_bottom + big_padding;
prediction_text.width = output_width;
prediction_text.left = final_left + 10;
return prediction_text;
}
// Append a text box to the page which labels one of the predictors
// This will be placed beside a Control for the predictor.
function appendPredictorLabel(new_page, variable, max_char_width, bottom_last, small_padding) {
const text_label = new_page.appendText();
const variable_label = determinePredictorLabel(variable, max_char_width);
const html = Q.htmlBuilder();
html.appendParagraph(variable_label, { size: 10 });
text_label.content = html;
text_label.top = bottom_last + small_padding;
text_label.left = small_padding;
return text_label;
}
// Append a control to the page for a predictor
function appendPredictorControl(new_page, test_mode, is_numeric, text_label, small_padding, control_width, ind, value) {
const control_type = is_numeric ? 'Textbox' : 'Combobox';
const new_control = test_mode ? createFakeControl(control_type, ind, value) : new_page.appendControl(control_type);
new_control.top = text_label.top - 2;
new_control.left = text_label.left + text_label.width + small_padding;
new_control.width = control_width;
if (!is_numeric) {
new_control.selectionMode = 'SingleSelection';
}
return new_control;
}
// Generate the R code which will define a column of the data frame
// for a predictor.
function getDataLineForCategoricalPredictor(is_binary, v_name, abbreviated_predictors, new_control, variable, is_CART_model, items) {
let line;
if (!is_binary) {
const ordered = variable.ordered;
line = `"${v_name}" = factor(${new_control.name}, levels = xlevels[["${v_name}"]], ordered = ${ordered})`;
}
else {
// line = stringToRName(v_name) + ' = as.integer(' + new_control.name + ')';
line = '"' + v_name + '" = as.integer(' + new_control.name + ')';
}
return line;
}
// Determine if the model has a numeric outcome variable
function determineIfModelHasNumericOutcomeVariable(classes, selected_item) {
const is_ml = classes.indexOf('MachineLearning') > -1;
let is_numeric_outcome = false;
if (is_ml) {
is_numeric_outcome = selected_item.data.get('numeric.outcome') != null && selected_item.data.get('numeric.outcome')[0];
}
else {
is_numeric_outcome = !(classes.indexOf('BinaryLogitRegression') > 0 || classes.indexOf('MultinomialLogitRegression') > 0 || classes.indexOf('OrderedLogitRegression') > 0);
}
return is_numeric_outcome;
}
// Generate the report when running in test mode
function testModeReport(classes, control_rows, final_code) {
log(classes.join("\r\n"));
log("\r\n");
control_rows.forEach(function (row) {
printObject(row.control);
});
log(final_code);
}
// Position and re-size the R output which displays the
// predicted value.
function arrangePredictionOutput(prediction_output, prediction_text, small_padding, output_width, final_left) {
prediction_output.top = prediction_text.top + prediction_text.height + small_padding;
prediction_output.width = output_width;
prediction_output.left = final_left;
prediction_output.height = 6 * small_padding;
}
// Add the "title" for the probabilities output
// which is a text box.
function appendProbabilityText(new_page, prediction_output, small_padding, final_left) {
const probability_text = new_page.appendText();
probability_text.text = 'Probabilities';
probability_text.top = prediction_output.top + prediction_output.height + small_padding;
probability_text.left = final_left + 10;
probability_text.height = 2 * small_padding;
return probability_text;
}
// Position and re-size the R output which displays the probabilities for
// each value of the outcome.
function arrangeProbabilityOutput(probability_output, probability_text, final_left, small_padding, output_width) {
probability_output.left = final_left;
probability_output.top = probability_text.top + probability_text.height + small_padding;
probability_output.width = output_width;
probability_output.height = 350;
}
function getModelVariableInfoFromTemplate(model) {
const is_CART_model = (model.outputClasses || []).indexOf('CART') > -1;
const variable_names = model.data.getAttribute("estimation.data.template", "names");
const outcome_name = model.data.getAttribute("estimation.data.template", "outcome.name");
const variable_data = variable_names.map(function (v) {
const type = model.data.get(["estimation.data.template", v, "type"])[0];
let levels;
let has_unobserved_levels = false;
let ordered = "FALSE";
let default_value = model.data.get(["estimation.data.template", v, "default.value"])[0];
if (type == "factor") {
const levels_shortened = (is_CART_model && model.data.get(["estimation.data.template", v, "levels.shortened"])[0]);
has_unobserved_levels = model.data.get(["estimation.data.template", v, "has.unobserved.levels"])[0];
levels = model.data.get(["estimation.data.template", v, levels_shortened ? "observed.short.levels" : "observed.levels"]);
ordered = model.data.get(["estimation.data.template", v, "ordered"])[0] ? "TRUE" : "FALSE";
default_value = levels[0];
}
const question_type = model.data.get(["estimation.data.template", v, "questiontype"])[0];
return {
name: v, type: type, levels: levels,
default_value: default_value,
is_outcome: v == outcome_name,
label: model.data.get(["estimation.data.template", v, "label"])[0],
question: model.data.get(["estimation.data.template", v, "question"])[0],
questionType: question_type,
ordered: ordered,
has_unobserved_levels: has_unobserved_levels,
is_binary: question_type.indexOf("PickAny") == 0,
is_numeric: question_type.indexOf("Number") == 0
};
});
return variable_data;
}