QScript Functions for Model Simulator

From Q
Jump to navigation Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
includeWeb('QScript Utility Functions');
includeWeb('QScript Selection Functions');
includeWeb('QScript R Output Functions');
includeWeb('JavaScript Utilities');
includeWeb('QScript Functions for Calculations');


function truncateStringWithElipses(string, max_chars) {
    if (string.length < max_chars) //No truncation necessary
        return string;

    return string.substring(0, max_chars - 4) + '...';
}

// For single-variable question, just use variable label, truncating if necessary
// For multiple-variable questions:
// - If concactenation of labels is short enough, return the concatentation
// - If the you can truncate the question name sensibly and concatenate then do so
// - Otherwise if truncating the question name would leave too few characters,
//   just return the label.
function determinePredictorLabel(variable, max_chars = 100) {
    let single_variable = ["PickOneMulti", "PickAnyGrid", "NumberMulti", "NumberGrid", "PickAny"].indexOf(variable.questionType) == -1;
    let var_label = variable.label;
    let q_name = variable.question;
    if (single_variable)
        return truncateStringWithElipses(var_label, max_chars);
    if (var_label.length + q_name.length + 2 < max_chars)
        return q_name + ': ' + var_label;
    if (var_label.length > (max_chars - 10))
        return var_label;
    return truncateStringWithElipses(q_name, max_chars - var_label.length - 5) + ': ' + var_label;
}

function getDesiredBoxHeight(page, font_size) {
    let temp_box = page.appendText();
    let temp_html = Q.htmlBuilder();
    temp_html.appendParagraph('temp', {size: font_size});
    temp_box.content = temp_html;
    let desired_height = temp_box.height;
    temp_box.deleteItem();
    return desired_height;
}

function resizeTextBoxToMinWidth(textbox, desired_height, max_width = 1000) {
    while (textbox.height <= desired_height) {
        textbox.width = textbox.width / 2;
    }
    let increment = 5;
    while (textbox.height > desired_height && textbox.width < max_width)
        textbox.width = textbox.width + increment;
    // Just to be safe in other browsers?
    // JW observed wrapping in safari without
    // the next line.
    textbox.width = textbox.width + increment;
}

// test_mode = true allows this to be run in a QScript test
function createPredictiveModelSimulator(test_mode = false) {
    let user_selections = getAllUserSelections();
    if (test_mode) {
        // Model should be at the base of the report for the test recording
        user_selections = {selected_r_outputs: [project.report.subItems[0]] };
    }
    let bad_selection_message = 'No Regression or Machine Learning model has been selected, please select ' +
                                'a model to use this feature';
    if (user_selections.selected_r_outputs.length === 0) {
        log(bad_selection_message);
        return false;
     }
    let selected_item = user_selections.selected_r_outputs[0];
    let classes = selected_item.outputClasses;
    let regression_selected = classes.includes('Regression');
    if (!regression_selected && !classes.includes('MachineLearning')) {
        log(bad_selection_message);
        return false;
    }
    // Valid input given
    let is_stacked_regression = regression_selected && isStackedRegression(selected_item);
    if (is_stacked_regression) {
        log("Stacked regression models are not supported by the simulator");
        return false;
    }
    // Ensure model has been computed recently and so has required attributes
    let model_names = selected_item.data.getAttribute([], "names")
    if (model_names.indexOf("estimation.data.template") == -1) {
        log("Recompute the selected model before using this script")
        return;
    }

    let warning = selected_item.warning;
    if (/categories do not appear in the data/.test(warning)) {
        log('Some categories are not present in the data used to construct the model so it is not possible ' +
            'to use the simulator to predict outcomes for those categories.');
    }

    const current_page = project.currentPage();
    let parent = test_mode ? project.report : current_page.group;

    // Create a new page
    const page_name = 'Simulator';
    let new_page = parent.appendPage('TitleOnly');

    if (!test_mode)
        parent.moveAfter(new_page, current_page);
    let title_text = new_page.subItems[0];
    new_page.name = page_name;
    project.report.setSelectedRaw([new_page]);
    title_text.text = page_name;

    // Specify the heights, widths, and padding
    // for the items which will be laid out on
    // the page

    let big_padding = 60;
    let small_padding = 10;
    let big_h_pad = 35;
    let small_h_pad = 10;
    const title_bottom = title_text.top + title_text.height;

    let control_width = 180;
    let output_width = 250;

    let bottom_last = title_bottom + big_padding + 25;

    const text_height = getDesiredBoxHeight(new_page, 10);
    let max_char_width = 75;
    if (new_page.width > 1030) {
        let expansion = (new_page.width / 1026);
        max_char_width = expansion * max_char_width;
        big_h_pad = expansion * big_h_pad;
        output_width = expansion * output_width;
        control_width = expansion * control_width;
    }
    let final_left = new_page.width - (output_width + big_h_pad);


    // Go through predictors, identify what type of variable each is, and map onto
    // a control with the relevant values.

    let model_variables = getModelVariableInfoFromTemplate(selected_item);
    let predictors = model_variables.filter(x => !x.is_outcome);

    // let outcome = selected_item.getInput('formOutcomeVariable');
    // let predictors = selected_item.getInput('formPredictorVariables');
    let is_CART_model = classes.indexOf('CART') > -1;
    let abbreviated_predictors = is_CART_model && selected_item.getInput('formPredictorCategoryLabels') !== 'Full labels';
    let abbreviated_outcome = is_CART_model && selected_item.getInput('formOutcomeCategoryLabels') !== 'Full labels';

    let data_lines = [];
    let control_rows = [];
    let categorical_variable_names = [];
    let categorical_combo_names = [];
    let categorical_variable_labels = [];
    let includes_categorical_predictors = false;

    predictors.forEach(function (variable, ind) {
        let is_numeric = variable.is_numeric; // Determined by QuestionType of variable
        let is_binary = variable.is_binary;
        if (!is_numeric && !is_binary)
            includes_categorical_predictors = true;

        let text_label = appendPredictorLabel(new_page, variable, max_char_width, bottom_last, small_padding);
        resizeTextBoxToMinWidth(text_label, text_height, 1000);

        let v_name = variable.name;

        let control_selected_value = variable.default_value;
        let items = variable.levels;

        let new_control = appendPredictorControl(new_page,
                                                 test_mode,
                                                 is_numeric,
                                                 text_label,
                                                 small_padding,
                                                 control_width,
                                                 ind,
                                                 control_selected_value);

        if (!is_numeric) {
            new_control.itemList = items;
            new_control.selectedItems = [control_selected_value];
            if (!is_binary) {
                categorical_combo_names.push(new_control.name);
                categorical_variable_names.push(v_name);
                categorical_variable_labels.push(variable.label);
            }
            data_lines.push(getDataLineForCategoricalPredictor(is_binary, v_name, abbreviated_predictors, new_control, variable, is_CART_model, items))
        } else {
            new_control.text = control_selected_value.toString();
            data_lines.push('"' + v_name + '" = as.numeric(' + new_control.name + ')');
        }
        bottom_last = text_label.top + text_label.height;
        control_rows.push( { label: text_label, control: new_control });
    });

    // Set positions of text and controls
    let label_right_anchor = new_page.width - (control_width + output_width + big_h_pad + 2 * small_h_pad);
    control_rows.forEach(function (obj) {
        obj.label.left = label_right_anchor - obj.label.width;
        obj.control.left = label_right_anchor + small_h_pad;
    });



    // Create an R output which calls predict using the input controls
    // R code for data frame from controls
    let model_name = selected_item.name;
    let categorical_predictors_code = categoricalPredictorsBlock(model_name,
                                                                 categorical_variable_names,
                                                                 categorical_variable_labels,
                                                                 categorical_combo_names,
                                                                 includes_categorical_predictors,
                                                                 is_CART_model,
                                                                 classes,
                                                                 predictors)
    let df_code = dataFrameBlock(data_lines);
    let probabilities_code = probabilitiesBlock();

    if (abbreviated_outcome || abbreviated_predictors)
    {
        log('Abbreviated levels were used in the CART model, and these will appear in the simulator. If you want to show the full category labels,'
        + " please make sure to set both Inputs > Predictor category labels and Inputs > Outcome category labels to 'Full labels'.");
    }

    let is_numeric_outcome = determineIfModelHasNumericOutcomeVariable(classes, selected_item);
    let prediction_text = appendPredictionText(new_page, is_numeric_outcome, title_bottom, big_padding, output_width, final_left);
    let predict_call_code = predictCallBlock(is_numeric_outcome ? '"vector"' : '"class"');
    let final_code = categorical_predictors_code + df_code + '\r\n' + predict_call_code;



    if (test_mode) {
        testModeReport(classes, control_rows, final_code);
    }

    let prediction_output = new_page.appendR(final_code);
    prediction_output.update();
    if (!test_mode)
        arrangePredictionOutput(prediction_output, prediction_text, small_padding, output_width, final_left);
    prediction_output.referenceName = generateUniqueRObjectReferenceName('model.predicted.outcome');

    if (!is_numeric_outcome) {
        let probability_text = appendProbabilityText(new_page, prediction_output, small_padding, final_left)
        let probability_output = new_page.appendR(categorical_predictors_code + df_code + '\r\n' + probabilities_code);
        probability_output.referenceName = generateUniqueRObjectReferenceName('model.predicted.probabilities');
        probability_output.update();
        if (!test_mode)
            arrangeProbabilityOutput(probability_output, probability_text, final_left, small_padding, output_width);
    }
}

function createFakeControl(type, ind, value) {
    let control_name = "fakeControl_" + ind;
    let new_control = {type: type, name: control_name};
    let r_output = project.report.appendR(control_name + " <- '" + value + "'");
    return new_control;
}

function printObject(o) {
  var out = '';
  for (var p in o) {
    out += p + ': ' + o[p] + '\r\n';
  }
  log(out);
}

// Generate the code which begins the R outputs.
// The main purpose is to organize information
// about categorical predictors in the model.
function categoricalPredictorsBlock(model_name,
                                    categorical_variable_names,
                                    categorical_variable_labels,
                                    categorical_combo_names,
                                    includes_categorical_predictors,
                                    is_CART_model,
                                    classes,
                                    variables) {
    let categorical_predictors_code = 'input.model = ' + model_name + '\r\n';
    if (includes_categorical_predictors) {
        categorical_predictors_code +=
`all.combo.boxes <- c(${categorical_combo_names.join(', ')})
estimation.data.template <- input.model$estimation.data.template
all.variable.names <- names(estimation.data.template)
outcome.name <- attr(estimation.data.template, "outcome.name")
all.predictor.names <- setdiff(all.variable.names, outcome.name)
predictor.templates <- estimation.data.template[all.predictor.names]
all.predictor.labels <- vapply(predictor.templates, "[[", character(1L), "label")
xlevels <- lapply(predictor.templates, FUN = function (this.var) {
    if (!is.null(this.var[["observed.short.levels"]]))
        return(this.var[["observed.short.levels"]])
    if (inherits(input.model, "LDA"))
        return(this.var[["levels"]])
    this.var[["observed.levels"]]
})
names(xlevels) <- all.predictor.names
xlevels <- Filter(length, xlevels)
all.predictor.names <- names(xlevels)
levels.not.in.data <- mapply(function(var.name, combo.name) !combo.name %in% xlevels[[var.name]],
                            all.predictor.names, all.combo.boxes)
if (any(levels.not.in.data)) {
    relevant.combos <- all.combo.boxes[levels.not.in.data]
    relevant.vars <- all.predictor.names[levels.not.in.data]
    matches <- mapply(function(var.name, combo.name) vapply(xlevels[[var.name]], function(x) sub(combo.name, "", x), character(1L)),
                      relevant.vars, relevant.combos, SIMPLIFY = FALSE)
    white.space.diffs <- lapply(matches, function(x) grepl("^\\\\s+$", x))
    if (any(unlist(white.space.diffs))) {
        var.to.use <- which.max(vapply(white.space.diffs, any, logical(1L)))
        stop("This feature is not compatible with category labels that have surrounding white space. ",
             "To use this feature the category ", dQuote(xlevels[[names(var.to.use)]][white.space.diffs[[var.to.use]]]),
             " needs to be renamed to ", dQuote(relevant.combos[var.to.use]), " in the predictor variable ",
             sQuote(all.predictor.labels[levels.not.in.data][var.to.use]))
    }
    first.bad.level = which.max(levels.not.in.data)
    stop("The selected category ", dQuote(all.combo.boxes[first.bad.level]), " in the predictor variable ",
         sQuote(all.predictor.labels[first.bad.level]), " is not observed in the dataset used to construct the model ",
         "and consequently the model cannot predict the outcome. Please select a different category for this ",
         "predictor variable.")
}
`;
    }
    return categorical_predictors_code;

}

// Generate the R code which defines the data frame containing
// the values for the desired prediction.
function dataFrameBlock(data_lines) {
    return 'DF = data.frame(' + data_lines.join(',\r\n\t') + ', check.names = FALSE)';
}

// Generate the remainder of the R code required for
// computing probabilities
function probabilitiesBlock() {
    return `
library(flipRegression)
library(flipData)
library(flipTransformations)
model.classes = class(input.model)
if ("SupportVectorMachine" %in% model.classes) {
    svm.probs <- e1071:::predict.svm(input.model$original, newdata = DF, probability = TRUE)
    new.probs <- attr(svm.probs, "probabilities")
} else if ("RandomForest" %in% model.classes) {
    new.probs <- flipMultivariates:::randomForestExtractVariables(input.model, "prob", newdata = DF)
} else if ("DeepLearning" %in% model.classes) {
    if (reticulate::py_is_null_xptr(input.model$original))
        input.model$original <- keras::unserialize_model(input.model$original.serial)
    X <- as.matrix(flipTransformations::AsNumeric(DF))
    constants <- input.model$training.stdevs == 0
    if (input.model$normalize)
        X[, !constants] <- scale(X[, !constants, drop = FALSE],
                                 center = input.model$training.means[!constants],
                                 scale = input.model$training.stdevs[!constants])
    new.probs <- predict(input.model$original, X)
    if (length(input.model$outcome.levels) == 2)
        new.probs <- cbind(1 - new.probs,new.probs)
    colnames(new.probs) <- input.model$outcome.levels
} else if ("CART" %in% model.classes) {
    new.probs <- tryCatch(rpart:::predict.rpart(input.model, newdata = DF, type = "prob"),
         error = function(e){
             if (grepl("new level", e$message))
                 stop("Cannot match categories. Please set Inputs > Predictor category labels to \'Full labels\' in the CART model.")
             else
                 e
         })
} else if ("GradientBoost" %in% model.classes) {
    new.probs <- flipMultivariates:::predict.GradientBoost(input.model, newdata = DF, keep.soft.probs = TRUE)
    if (length(new.probs) == 1L)
        new.probs <- cbind(1 - new.probs, new.probs)
    colnames(new.probs) <- input.model$outcome.levels
} else if ("LDA" %in% model.classes) {
    DF <- AsDataFrame(DF, use.names = TRUE,
                         ignore.columns = "",
                         categorical.as.binary = TRUE,
                         remove.first = TRUE)
    new.probs <- flipMultivariates:::ldaExtractVariables(input.model, "posterior", input.model$prior, newdata = DF, na.action = na.pass)
} else if ("BinaryLogitRegression" %in% model.classes) {
    new.probs <- Probabilities(input.model, newdata = DF)
} else if ("MultinomialLogitRegression" %in% model.classes) {
    new.probs <- flipRegression:::Probabilities.Regression(input.model, newdata = DF)
    if (nrow(new.probs) == 1L)
        colnames(new.probs) <- input.model$original$lev
    else
        new.probs <- new.probs[, 2, drop = FALSE]
} else if ("OrderedLogitRegression" %in% model.classes) {
    new.probs <- flipRegression:::Probabilities.Regression(input.model, newdata = rbind(DF, DF)) #Doesn\'t like new data with a single row
    new.probs <- as.matrix(new.probs[1, , drop = FALSE])
}

if (ncol(new.probs) > 1)
    new.probs <- t(new.probs)
colnames(new.probs) <- "Probability (%)"
new.probs <- new.probs * 100
${generateUniqueRObjectReferenceName('predictions')} <- new.probs
`;
}

// Generatre the remainder of the code required for
// computing the predicted outcome value
function predictCallBlock(vector_or_class) {
    return `
is.cart <- inherits(input.model, "CART")
is.lda <- inherits(input.model, "LDA")
prediction.function <- predict
if (is.lda) {
    DF <- AsDataFrame(DF, use.names = TRUE,
                      ignore.columns = "",
                      categorical.as.binary = TRUE,
                      remove.first = TRUE)
}
arguments <- list(input.model, newdata = DF)
if (is.cart) {
   prediction.function <- rpart:::predict.rpart
   type <- ${vector_or_class}
   arguments <- c(arguments, type = type)
}
if (is.lda) {
    prediction.function <- flipMultivariates:::ldaExtractVariables
    arguments$type <- "class"
    arguments$prior <- input.model$prior
    arguments$na.action <- na.pass
}
as.vector(do.call(prediction.function, arguments))`;
}

// Append the "title" for the predicted value as a text box.
function appendPredictionText(new_page, is_numeric_outcome, title_bottom, big_padding, output_width, final_left) {
    let prediction_text = new_page.appendText();
    prediction_text.text = 'Predicted ' + (is_numeric_outcome ? 'value' : 'category');
    prediction_text.top = title_bottom + big_padding;
    prediction_text.width = output_width;
    prediction_text.left = final_left + 10;
    return prediction_text;
}

// Append a text box to the page which labels one of the predictors
// This will be placed beside a Control for the predictor.
function appendPredictorLabel(new_page, variable, max_char_width, bottom_last, small_padding) {
    let text_label = new_page.appendText();
    let variable_label = determinePredictorLabel(variable, max_char_width);
    let html = Q.htmlBuilder();
    html.appendParagraph(variable_label, { size: 10 });
    text_label.content = html;
    text_label.top = bottom_last + small_padding;
    text_label.left = small_padding;
    return text_label;
}

// Append a control to the page for a predictor
function appendPredictorControl(new_page, test_mode, is_numeric, text_label, small_padding, control_width, ind, value) {
    let control_type = is_numeric ? 'Textbox' : 'Combobox';
    let new_control = test_mode ? createFakeControl(control_type, ind, value) : new_page.appendControl(control_type);
    new_control.top = text_label.top - 2;
    new_control.left = text_label.left + text_label.width + small_padding;
    new_control.width = control_width;
    if (!is_numeric)
        new_control.selectionMode = 'SingleSelection';
    return new_control;
}

// Identify the items which will be available in a combo box for
// a predictor
function getComboBoxItemsForCategoricalPredictor(is_binary, abbreviated_predictors, selected_item, variable, v_name) {
    let items;
    if (is_binary) {
        items = ['0', '1'];
    } else if (abbreviated_predictors) {
        items = selected_item.data.getAttribute(['model', v_name], 'levels');
    } else {
        items = determineCategoricalLabelsForTextbox(variable)['labels'];
    }
    return items;
}

// Generate the R code which will define a column of the data frame
// for a predictor.
function getDataLineForCategoricalPredictor(is_binary, v_name, abbreviated_predictors, new_control, variable, is_CART_model, items) {
    let line;
    if (!is_binary) {
        let ordered = variable.ordered;
        line = `"${v_name}" = factor(${new_control.name}, levels = xlevels[["${v_name}"]], ordered = ${ordered})`;
    } else
        // line = stringToRName(v_name) + ' = as.integer(' + new_control.name + ')';
        line = '"' + v_name + '" = as.integer(' + new_control.name + ')';
    return line;
}

// Determine if the model has a numeric outcome variable
function determineIfModelHasNumericOutcomeVariable(classes, selected_item) {
    let is_ml = classes.indexOf('MachineLearning') > -1;
    let is_numeric_outcome = false;
    if (is_ml) {
        is_numeric_outcome = selected_item.data.get('numeric.outcome') != null && selected_item.data.get('numeric.outcome')[0];
    } else {
        is_numeric_outcome = !(classes.indexOf('BinaryLogitRegression') > 0 || classes.indexOf('MultinomialLogitRegression') > 0 || classes.indexOf('OrderedLogitRegression') > 0);
    }
    return is_numeric_outcome;
}

// Generate the report when running in test mode
function testModeReport(classes, control_rows, final_code) {
    log(classes.join("\r\n"));
    log("\r\n")
    control_rows.forEach(function(row) {
        printObject(row.control);
    })
    log(final_code);
}

// Position and re-size the R output which displays the
// predicted value.
function arrangePredictionOutput(prediction_output, prediction_text, small_padding, output_width, final_left) {
    prediction_output.top = prediction_text.top + prediction_text.height + small_padding;
    prediction_output.width = output_width;
    prediction_output.left = final_left;
    prediction_output.height = 6 * small_padding;
}

// Add the "title" for the probabilities output
// which is a text box.
function appendProbabilityText(new_page, prediction_output, small_padding, final_left) {
    let probability_text = new_page.appendText();
    probability_text.text = 'Probabilities';
    probability_text.top = prediction_output.top + prediction_output.height + small_padding;
    probability_text.left = final_left + 10;
    probability_text.height = 2 * small_padding;
    return probability_text;
}

// Position and re-size the R output which displays the probabilities for
// each value of the outcome.
function arrangeProbabilityOutput(probability_output, probability_text, final_left, small_padding, output_width) {
    probability_output.left = final_left;
    probability_output.top = probability_text.top + probability_text.height + small_padding;
    probability_output.width = output_width;
    probability_output.height = 350;
}

function getModelVariableInfoFromTemplate(model) {
    let is_CART_model = model.outputClasses.indexOf('CART') > -1;
    let variable_names = model.data.getAttribute("estimation.data.template", "names");
    let outcome_name = model.data.getAttribute("estimation.data.template", "outcome.name");
    let variable_data = variable_names.map(function(v) {
        let type = model.data.get(["estimation.data.template", v, "type"])[0];
        let levels;
        let has_unobserved_levels = false;
        let ordered = "FALSE";
        let default_value = model.data.get(["estimation.data.template", v, "default.value"])[0];
        if (type == "factor") {
            let levels_shortened = (is_CART_model && model.data.get(["estimation.data.template", v, "levels.shortened"])[0])
            has_unobserved_levels = model.data.get(["estimation.data.template", v, "has.unobserved.levels"])[0]
            levels = model.data.get(["estimation.data.template", v, levels_shortened ? "observed.short.levels" : "observed.levels"]);
            ordered = model.data.get(["estimation.data.template", v, "ordered"])[0] ? "TRUE" : "FALSE";
            default_value = levels[0];
        }
        let question_type = model.data.get(["estimation.data.template", v, "questiontype"])[0];
        return {name: v, type: type, levels: levels,
                default_value: default_value,
                is_outcome: v == outcome_name,
                label: model.data.get(["estimation.data.template", v, "label"])[0],
                question: model.data.get(["estimation.data.template", v, "question"])[0],
                questionType: question_type,
                ordered: ordered,
                has_unobserved_levels: has_unobserved_levels,
                is_binary: question_type.indexOf("PickAny") == 0,
                is_numeric: question_type.indexOf("Number") == 0};
    });
    return variable_data;
}