QScript Functions for Sort and Delete

From Q
Jump to navigation Jump to search
This page is currently under construction, or it refers to features which are under development and not yet available for use.
This page is under construction. Its contents are only visible to developers!
function sortPagesBySignificance(pages_to_sort, log_results,min_p_values) {
    includeWeb("QScript Selection Functions");  // for getUserSelections, recursiveGetAll
    let fail_message = "Cannot sort the selection. Select two or more pages " +
        "in the same folder or at the base level of Pages, or select a single folder of pages.";
    let parent;  // parent QScript ReportGroup of pages to sort
    if (!pages_to_sort) {
        user_selections = getAllUserSelections();
        
        let pages_in_same_folder = selectedPagesAreInSameFolder(user_selections);
        let one_folder_selected = exactlyNSelected(user_selections, "Top Pages", 1);
        // Check the user has selected items that can be sorted unambiguously
        if (!pages_in_same_folder && !one_folder_selected) {
            log(fail_message);
            return false;
        }

        if (pages_in_same_folder && !one_folder_selected) {
            parent = user_selections.selected_top_level_pages[0].parentGroup();
            pages_to_sort = user_selections.selected_top_level_pages;
        } else if (one_folder_selected) { // Sort pages within folder
            parent = user_selections.selected_top_level_pages[0];
            pages_to_sort = parent.subItems.filter(function (item) { return item.type == "ReportGroup"; });
            if (pages_to_sort.length < 1) {
                log(fail_message);
                return false;
            }
        }
    }else {
        parent = pages_to_sort[0].parentGroup();
    }

    sortItemsBySignificance(pages_to_sort, parent, min_p_values);

    // Make a list of pages that don't have sortable objects (tables, plots)
    let unsortable_pages = pages_to_sort.filter(function (page) {
        return page.subItems.filter(function (item) {
            return ["Table", "Plot"].indexOf(item.type) > -1;
        }).length == 0;
    });

    if (log_results) {
        log("Pages have been sorted so that those containing Tables or Charts " +
            "with the most significant results (lowest p-values) are at the top.");
    }
    if (unsortable_pages.length > 0) {
        log("Some pages did not contain any tables or charts with " +
            "p-values and have been sorted to the bottom.");
    }
    return;
}

function sortItemsBySignificance(items, parent, min_p_values) {
    let sort_objects;
    if (min_p_values === undefined) {
        sort_objects = items.map((item, idx) => ({item: item,
                                                  smallest_p: (item.type === "ReportGroup" ?
                                                        getSmallestPValueOnPage(item) :
                                                        minimumPValue(item)),
                                                  idx: idx}));
    } else {
        sort_objects = items.map((item, idx) => ({item: item,
                                                  smallest_p: min_p_values[idx],
                                                  idx: idx}));
    }

    sort_objects = sort_objects.sort(function (a, b) {
        let diff = a.smallest_p - b.smallest_p;
        if (Math.abs(diff) < 1e-10)
            return a.idx - b.idx; 
        return diff;
    });

    let previous_item = sort_objects[0].item;
    sort_objects.forEach(function (obj, index) {
        if (index > 0) {
            parent.moveAfter(obj.item, previous_item);
            previous_item = obj.item;
        }
    });
    return;
}

// Returns true if all top-level selected pages are in the same folder, otherwise false.
function selectedPagesAreInSameFolder(user_selections) {
    var selected_top_level_pages = user_selections.selected_top_level_pages;
    var parents = selected_top_level_pages.map(function (page) {
        return page.parentGroup();
    });
    return parents.every(function (grp) { return parents[0].equals(grp)})
}

function getSmallestPValueOnPage(page) {
    var smallest_p = Infinity;
    var tables_plots_on_page = page.subItems.filter(function (item) { return ["Table", "Plot"].indexOf(item.type) > -1; });
    tables_plots_on_page.forEach(function(item) {
        var min_p = minimumPValue(item);
        if (min_p !== undefined && min_p < smallest_p)
            smallest_p = min_p;
    });
    return smallest_p;
}

function minimumPValue(item) {

    let row_indices_without_net;
    let stats;
    let table = item;

    // When item is R output it must be
    // a visualization with a nested table
    // otherwise should not be checked
    // and given a value of -Infinity.
    // If it has a nested table, then that
    // table should be evaluated as any
    // other table.
    if (item.type == "R Output") {
        if (!rItemIsPlot(item))
            return -Infinity;

        if (rPlotHasNestedTable(item)) 
            table = item.subItems.filter(x => x.type == "Table")[0];
        else
            return -Infinity; // So as to never delete
    }

    // treat plots and tables separately
    if (item.type == 'Plot') {

        if (item.primary == null || item.tertiary != null) 
            return Infinity;

        table = project.report.appendTable();
        table.primary = item.primary;
        if (item.secondary != null)
            table.secondary = item.secondary;

        if (item.weight != null) 
            table.weight = item.weight;
        if (item.filters != null) 
            table.filters = item.filters;
    } else if (item.type == "WordCloud") 
        return Infinity;
    
    if (tableQuestionsHaveNoData(table)) {
        return Infinity;
    }

    // Ensure crosstabs with the same question
    // in rows and columns appear at top (so they
    // can be easily identified and deleted.
    if (table.primary.equals(table.secondary))
        return -Infinity;

    let pvalues;
    try {
        pvalues = PValuesExcludingNETs(table);
    } catch(e) {
        pvalues = Infinity;
    }
    if (item.type === "Plot")
        table.deleteItem(); // Deleting temp table

    return minWithReplacedNaN(pvalues, Infinity);
}

function PValuesExcludingNETs(table) {
    includeWeb("QScript Table Functions");  // for getStatisticsFromTable
    
    const stat_name = "Corrected p";
    let table_output = table.calculateOutput();
    row_indices_without_net = table_output.rowIndices(false);
    stats = getStatisticsFromTable(table, [stat_name]);
    // If there are no p-values, the item is not significant, delete.
    if (stats == null || stats[stat_name] == null) 
        return Infinity;

    // remove NET and SUM rows
    let pvalues_without_net = [];
    row_indices_without_net.forEach(function (index) {
        pvalues_without_net.push(stats[stat_name][index]);
    });
    return pvalues_without_net;
}    

function tableQuestionsHaveNoData(table) {
    if (table.primary.dataReduction.rowLabels.length == 0)
        return true;
    if (table.secondary != null && table.secondary.dataReduction != null &&
        table.secondary.dataReduction.rowLabels.length == 0)
        return true;
    return false
}

function sortQuestionsInDataFileOrder(questions) {
    var current_data_file = questions[0].dataFile;
    var all_question_names = current_data_file.questions.map(function (q) { return q.name; });
    questions.sort(function (a, b) {
        if (all_question_names.indexOf(a.name) < all_question_names.indexOf(b.name)) { return -1; }
        if (all_question_names.indexOf(a.name) > all_question_names.indexOf(b.name)) { return 1; }
        return 0;
    });
    return questions;
}

function sortVariablesInDataFileOrder(variables) {
    var current_data_file = variables[0].question.dataFile;
    var all_variable_names = current_data_file.variables.map(function (q) { return q.name; });
    variables.sort(function (a, b) {
        if (all_variable_names.indexOf(a.name) < all_variable_names.indexOf(b.name)) { return -1; }
        if (all_variable_names.indexOf(a.name) > all_variable_names.indexOf(b.name)) { return 1; }
        return 0;
    });
    return variables;
}

function deleteInsignificantTablesPlots(p_threshold, log_results, min_p_values) {
    includeWeb("QScript Selection Functions");  // for getUserSelections, oneOrMoreSelected
    if (log_results === undefined)
        log_results = true;
    function getThreshold()
    {
        var sig_level = -1;
        while (sig_level <= 0 || sig_level >= 100) {
            sig_level = prompt("Please enter the desired significance level (e.g. 95 for significance at 95%):", 95);
        }
        return Math.round((1 - sig_level/100) * 1e10) / 1e10; // corrected for numerical precision errors
    }

    const stat_name = 'Corrected p';
    const web_mode = inDisplayr();
    let selected_items = [];
    let deleted_items = [];
    let not_evaluated_r_plots = [];
    let not_evaluated_r_items = [];
    if (web_mode) {        
        let user_selections = getAllUserSelections();
        let organized_selections = organizeSelectedItemsForStatisticBasedDeletion(user_selections);
        if (!organized_selections)
            return false;
        selected_items = organized_selections.selected_items;
        not_evaluated_r_plots = organized_selections.not_evaluated_r_plots;
        not_evaluated_r_items = organized_selections.not_evaluated_r_items;
    } else {
        selected_items = selectedTablesAndPlots(project.report);
        if (selected_items.length < 1) {
            log("There are no tables or charts selected. Select one or more tables or charts.");
            return false;    
        }
    }


    selected_items = selected_items.filter(item => item.group != null);

    if (p_threshold == null)
        p_threshold = getThreshold();

    if (min_p_values == null)
        min_p_values = selected_items.map(minimumPValue);

    // filter selected_items with significant p-values, leaving only items for deletion
    deleted_items = selected_items.filter((item,idx) => {
        p = min_p_values[idx];
        return p > p_threshold;
    });

    let not_deleted = selected_items.filter((item,idx) => {
        p = min_p_values[idx];
        return p <= p_threshold;
    });
 
    // In Q, just delete the selected items. On the web we need to clear out the pages
    // which are made blank by the removal of tables/plots
    // This is mainly to facilitate the Insert > Report feature being cleanable.
    
    let all_selected_pages = deleted_items.map(item => item.group);
    let previous_page = identifyPreviousPageToDeletions(all_selected_pages);

    let deleted_table_names = [];
    let deleted_plot_names = [];
    deleted_items.forEach(function (item) { 
        try {
            (item.type === "Table" ? deleted_table_names : deleted_plot_names).push(item.name);
            item.deleteItem();
            return;
        } catch (e) {}
    });

    let deleted_page_names = [];
    if (web_mode) {
        deleted_page_names = deleteEmptyPages(all_selected_pages);
    }

    if (log_results && !web_mode)
        logDeletionSummary(deleted_table_names, deleted_plot_names,
                           deleted_page_names, min_p_values, p_threshold);

    if (deleted_items.length > 0) {
        if (not_deleted.length > 0) {
            let target_item = web_mode ? not_deleted[0].group : not_deleted[0]; 
            project.report.setSelectedRaw([target_item]);
        } else 
            project.report.setSelectedRaw([previous_page]);
    }

    if (not_evaluated_r_plots.length > 0)
        log("Some visualizations could not be evaluated for significance and have not been deleted.");

    if (not_evaluated_r_items.length > 0)
        log(correctTerminology("Calculations were not evaluated for significance and have not been deleted."));

    return;
}

function deleteEmptyPages(pages) {
    // for recursiveGetAllGroupsInGroup, recursiveGetAllItemsInGroup
    includeWeb("QScript Selection Functions");
    let deleted_pages = [];
    let deleted_page_names = [];    
    pages.forEach(function (x) {
        recursiveGetAllGroupsInGroup(x , pages);
    });
        
    pages.forEach(function (page) {
        var current_sub_items = [];
        recursiveGetAllItemsInGroup(page, current_sub_items);
        current_sub_items = current_sub_items.filter(item => item.type != "Text");
        if (current_sub_items.length === 0) {
                deleted_pages.push(page);
                deleted_page_names.push(page.name);
        }
    });
    deleted_pages.forEach(function (page) { 
        try {
            page.deleteItem();
        } catch (e) {

        }
    })
    return deleted_page_names;
}

function logDeletionSummary(deleted_table_names, deleted_plot_names,
                            deleted_page_names, min_p_values, p_threshold) {
    let n_deleted = deleted_table_names.length + deleted_plot_names.length;    
    if (n_deleted === 0) {
        log("No tables or plots met the condition to be deleted.");
        return;
    }
    
    if (deleted_table_names.length > 0) {
        log('The following tables were not significant at the ' + p_threshold +
            ' level, and have been deleted:');
        log(deleted_table_names.join("\r\n"));
        log('\r\n');
    }

    if (deleted_plot_names.length > 0) {
        log('The following plots were not significant at the ' + p_threshold + ' level, and have been deleted:');
        log(deleted_plot_names.join("\r\n"));
        log('\r\n');
    }

    if (deleted_page_names.length > 0) {
        log('The following pages were made empty, and have been deleted:');
        log(deleted_page_names.join("\r\n"));
    }
    return;
}

function sortPagesAlphabetically() {
    includeWeb("QScript Functions to Generate Outputs");
    includeWeb('QScript Selection Functions');
    includeWeb('QScript Table Functions');
    includeWeb('QScript Utility Functions');
    var fail_message = "Cannot sort the selection. Select two or more pages in the same folder or at the base level of Pages, or select a single folder.";

    var user_selections = getAllUserSelections();

    var pages_in_same_folder = selectedPagesAreInSameFolder(user_selections);
    var one_folder_selected = exactlyNSelected(user_selections, "Top Pages", 1);

    // Check the selected pages are not length 0
    if (user_selections.selected_top_level_pages.length == 0) {
        log(fail_message);
        return false;
    }

    // Check the user has selected items that can be sorted unambiguously
    if (!pages_in_same_folder && !one_folder_selected) {
        log(fail_message);
        return false;
    }

    // Sort selected pages
    var parent;
    var pages_to_sort;


    if (pages_in_same_folder && !one_folder_selected) {
        parent = user_selections.selected_top_level_pages[0].parentGroup();
        pages_to_sort = user_selections.selected_top_level_pages;
    } else if (one_folder_selected) { // Sort pages within folder
        parent = user_selections.selected_top_level_pages[0];
        pages_to_sort = parent.subItems.filter(function (item) { return item.type == "ReportGroup"; });
        if (pages_to_sort.length < 1) {
            log(fail_message);
            return false;
        }
    }

    pages_to_sort.sort(function (a, b) { 
        if (a.name.toLowerCase() < b.name.toLowerCase()) { return -1; }
        if (a.name.toLowerCase() > b.name.toLowerCase()) { return 1; }
        return 0;
    });


    var last_page = pages_to_sort[0];
    pages_to_sort.forEach(function (page, index) {
        if (index > 0) {
            parent.moveAfter(page, last_page);
            last_page = page;   
        }
        
    });
}

function sortVariablesAlphabetically(by_variable_name = false) {
    includeWeb("QScript Selection Functions");
    
    function getSortableName(x, by_variable_name) {
        if (x.type == "Question")
            return by_variable_name ? x.variables[0].name : x.name;
        else
            return by_variable_name ? x.name : x.label;
    }



    var fail_message = "Cannot sort. Select two or more individual variables, or at least one variable set, under Data Sets.";

    var user_selections = getAllUserSelections();

    var selected_questions = user_selections.selected_questions;

    if (!NOrMoreSelected(user_selections, "Variable", 2)) {
        log(fail_message);
        return false;
    }

    if (!exactlyNSelected(user_selections, "Data Set", 1)) {
        log("Cannot sort. Select variables from one data set only.");
        return false;
    }
    var data_file = user_selections.selected_data_sets[0];


    // Sort variable sets First
    // Only if more than one is selected
    if (NOrMoreSelected(user_selections, "Question", 2)) {
        var selected_questions = user_selections.selected_questions;
     
        // Move variables together.   
        sortQuestionsInDataFileOrder(selected_questions);
        var last_variable = selected_questions[0].variables[selected_questions[0].variables.length - 1];
        var remaining_questions = selected_questions.slice(1);
        var remaining_variables = [];
        remaining_questions.forEach(function (q) {
            remaining_variables = remaining_variables.concat(q.variables);
        });
        data_file.moveAfter(remaining_variables, last_variable);

        // Sort questions by name
        selected_questions.sort(function (a, b) {
            let a_name = getSortableName(a, by_variable_name).toLowerCase();
            let b_name = getSortableName(b, by_variable_name).toLowerCase(); 
            if (a_name < b_name) { return -1; }
            if (a_name > b_name) { return 1; }
            return 0;
        });
        var last_variable = selected_questions[0].variables[selected_questions[0].variables.length - 1];
        selected_questions.forEach(function (q, index) {
            if (index > 0) {
                data_file.moveAfter(q.variables, last_variable);
                last_variable = q.variables[q.variables.length - 1];
            }
        });

    } else if (exactlyNSelected(user_selections, "Question", 1)) {
        // If a single variable set is selected, sort the variables within it
        // or if a subset of variables is selected, sort them relative to
        // one another. 
        let question = user_selections.selected_questions[0];
        let selected_variables = question.variables;

        let data_set_guids = question.dataFile.questions.map(q => q.guid);
        let question_index_in_file = data_set_guids.indexOf(question.guid);
        let last_var = null;
        if (question_index_in_file > -1) {
            let previous_question = question.dataFile.questions[question_index_in_file - 1];
            last_var = previous_question.variables[previous_question.variables.length - 1];
        }

        // Sort selected variables alphabetically by label
        selected_variables.sort(function (a, b) {
            let a_name = getSortableName(a, by_variable_name).toLowerCase();
            let b_name = getSortableName(b, by_variable_name).toLowerCase(); 
            if (a_name < b_name) { return -1; }
            if (a_name > b_name) { return 1; }
            return 0;
        });

        question.dataFile.moveAfter(selected_variables, last_var);

    }
}

function organizeSelectedItemsForStatisticBasedDeletion(user_selections) {
    let selected_items = [];
    let not_evaluated_r_plots = [];
    let not_evaluated_r_items = [];

    // Check selections
    if (!(oneOrMoreSelected(user_selections, "Table", true) ||
          oneOrMoreSelected(user_selections, "Plot", true) ||
          oneOrMoreSelected(user_selections, "WordCloud", true) ||
          oneOrMoreSelected(user_selections, "R Output", true))) {
        log("There are no tables or charts selected. Select one or more " +
             "pages which contain tables or charts.");
        return false;
    }
    selected_items = [].concat(user_selections.selected_tables).concat(user_selections.selected_plots).concat(user_selections.selected_word_clouds);
    selected_items = selected_items.concat(user_selections.implicitly_selected_tables)
        .concat(user_selections.implicitly_selected_plots)
        .concat(user_selections.implicitly_selected_word_clouds);
    
    // Special handling for R outputs.
    // Can currently only check significance for R plots that have nested
    // tables (i.e. have been converted from a table).
    // Do not delete any other R items and later tell the user that those
    // items could not be check and were not deleted.
    let selected_r_items = user_selections.implicitly_selected_r_outputs;


    // Separate plots from other R items
    let r_plots_vs_other = splitArrayIntoApplicableAndNotApplicable(selected_r_items, rItemIsPlot);
    not_evaluated_r_items = r_plots_vs_other.notApplicable;
    let selected_r_plots = r_plots_vs_other.applicable;
    // Separate plots which have nested tables from those that don't
    let r_plots_can_evaluate = splitArrayIntoApplicableAndNotApplicable(selected_r_plots, rPlotHasNestedTable);
    not_evaluated_r_plots = r_plots_can_evaluate.notApplicable;

    // Include R plots with nested tables among things to check
    selected_items = selected_items.concat(selected_r_plots);

    return { selected_items: selected_items, 
             not_evaluated_r_plots: not_evaluated_r_plots, 
             not_evaluated_r_items: not_evaluated_r_items }
}

function deleteWithSmallSample() {

    let min_size = prompt("Please enter the minimum sample size that you wish to view. Tables and visualizations with a smaller sample size will be removed.", 10);

    const stat_name = 'Base n';
    const web_mode = inDisplayr();
    let selected_items = [];
    let not_evaluated_r_plots = [];
    let not_evaluated_r_items = [];
        if (web_mode) {        
        let user_selections = getAllUserSelections();
        let organized_selections = organizeSelectedItemsForStatisticBasedDeletion(user_selections);
        if (!organized_selections)
            return false;
        selected_items = organized_selections.selected_items;
        not_evaluated_r_plots = organized_selections.not_evaluated_r_plots;
        not_evaluated_r_items = organized_selections.not_evaluated_r_items;
        selected_items = selected_items.filter(item => item.group != null);
    } else {
        selected_items = selectManyTablesWithGroupNames("Please choose which tables you would like to check. Tables with a sample size less than " +min_size+" will be removed." , project.report).tables;
        if (selected_items.length < 1) {
            log("There are no tables or charts selected. Select one or more tables or charts.");
            return false;    
        }
    }

    let deleted_items = [];
    let not_deleted_items = [];

    selected_items.forEach(function (item) {
        if (maxBaseBelowThreshhold(item, min_size))
            deleted_items.push(item);
        else
            not_deleted_items.push(item);
    });

    // In Q, just delete the selected items. On the web we need to clear out the pages
    // which are made blank by the removal of tables/plots
    // This is mainly to facilitate the Insert > Report feature being cleanable.
    let all_selected_pages = deleted_items.map(item => item.group);
    let previous_page = identifyPreviousPageToDeletions(all_selected_pages);

    let deleted_table_names = [];
    let deleted_plot_names = [];
    deleted_items.forEach(function (item) { 
        try {
            (item.type === "Table" ? deleted_table_names : deleted_plot_names).push(item.name);
            item.deleteItem();
            return;
        } catch (e) {}
    });

    let deleted_page_names = [];
    if (web_mode) {
        deleted_page_names = deleteEmptyPages(all_selected_pages);
    }

    // If we have deleted anything, choose where to change the focus
    if (deleted_items.length > 0) {
        if (not_deleted_items.length > 0) {
            let target_item = web_mode ? not_deleted_items[0].group : not_deleted_items[0]; 
            project.report.setSelectedRaw([target_item]);
        } else 
            project.report.setSelectedRaw([previous_page]);
    }


    if (!web_mode) {
        if (deleted_table_names.length > 0) {
            log("The following tables have been removed: ");
            log(deleted_table_names.join('\r\n'));
        } else {
            log("No tables were found to have a sample size less than " + min_size +".");
        }
    }

    if (not_evaluated_r_plots.length > 0)
        log("Sample size could not be identified for some visualizations and these have not been deleted.");

    if (not_evaluated_r_items.length > 0)
        log(correctTerminology("Sample size was not checked for Calculations and these have not been deleted."));

    return;


}
 

// Identify whether the largest sample size for an item
// is below the given min size.
// For tables, check the Base n.
// For plots, convert to table and check the Base n
// For visualizations with nested tables, check the base n
// of the nested table.
// For other R items, do nothing.
function maxBaseBelowThreshhold(item, min_size) {

    let output;
    let table;
    if (item.type ==  'Table') {
        try {
            output = item.calculateOutput();
        } catch (e) {
            return true; // Table is empty and should be removed
        }
        if (output.availableStatistics.indexOf('Text') > -1)
            return false; // Table is text and does not have concept of missing data
        return maxWithReplacedNaN(output.get('Base n'), 0) < min_size;
    }

    if (item.type == 'Plot') {

        if (item.primary == null)  
            return true; // empty, delete

        if (item.tertiary != null)
            return false; //can't check sample size on this, don't delete

        table = project.report.appendTable();
        table.primary = item.primary;
        if (item.secondary != null)
            table.secondary = item.secondary;

        if (item.weight != null) 
            table.weight = item.weight;
        if (item.filters != null) 
            table.filters = item.filters;

        try {
            output = table.calculateOutput();
            let delete_this = maxWithReplacedNaN(output.get('Base n'), 0) < min_size;
            table.deleteItem();
            return delete_this;
        } catch (e) {
            table.deleteItem();
            return true; // plot is empty and should be removed
        }
    }

    if (item.type == "R Output") {
        if (!rItemIsPlot(item))
            return false; // not a plot, don't delete

        if (rPlotHasNestedTable(item)) {
            table = item.subItems.filter(x => x.type == "Table")[0];
            try {
                output = table.calculateOutput();
                return maxWithReplacedNaN(output.get('Base n'), 0) < min_size;
            } catch (e) {
                return true; // Table is empty and should be removed
            }
        } else
            return false; //  not nested table, don't delete
    }    
} 

// Given a collection of pages to be deleted, work out what page
// is previous to those pages to which the focus can be set at the 
// completion of the QScript
function identifyPreviousPageToDeletions(deleted_pages) {
    let previous_page = null;
    if (deleted_pages.length > 0) {
        let parent_of_first = deleted_pages[0].group;
        let guids_in_parent = parent_of_first.subItems.filter(x => x.type == "ReportGroup")
                                                      .map(y => y.guid);
        let index_of_first_within_parent = guids_in_parent.indexOf(deleted_pages[0].guid);
        if (index_of_first_within_parent == 0)
            previous_page = parent_of_first;
        else
            previous_page = parent_of_first.subItems[index_of_first_within_parent - 1];

    }
    return previous_page;
}