Calculate Grouped Medians

From Q
Jump to: navigation, search

Q calculates Median values in the standard way, based on the values in the raw data. It is sometimes desirable to calculate the median differently for questions whose categories contain ranges of values. Common examples include questions describing income brackets or age ranges. In these cases it is possible to use the values in the ranges to calculate a grouped median. One such calculation is described here. The calculation is designed to work out the relative position within the bracket where the median lies, based on the proportion of the sample in that bracket. This is a different estimate of the median than that provided by assigning mid-point values to each category, which would simply provide the mid-point value for the category in which the median of the data lies (see Create New Variables - Midpoint Coding and Quantification).

You can use the code on this page to replace the standard median shown in the Statistics - Below with a median computed on the basis of the ranges in your data. To use it:

  1. Create a table with the desired categories shown in the rows. It is best not to include any additional NETs in the table - the rows should describe only those categories which represent unique brackets from which you want to compute medians.
  2. Select Automate > Custom Rule.
  3. Paste in the code from below.
  4. Click the 'play' icon and Close.
  5. Enter the Lower value and Upper value for each category.
  6. Click OK.

Here is an example of the values entered, and the result on the table.


Note that it is only appropriate to use this calculation when the rows of the table describe mutually-exclusive categories. The values entered, and the resulting calculation, are not used in any statistical testing.

form.setSummary("Grouped medians");
var below_table_exists = belowTableExists();

if (!below_table_exists)
    form.ruleNotApplicable("Statistics - Below are not available on this table");
if (below_table.availableStatistics.indexOf("Median") == -1)
    form.ruleNotApplicable("the Median is not available on this table");
if (table.availableStatistics.indexOf("%") == -1 && table.availableStatistics.indexOf("Column %") == -1)
    form.ruleNotApplicable("there is no appropriate percentage to use");

var stat_to_use = (table.availableStatistics.indexOf("Column %") != -1) ? "Column %" : "%";
var values = table.get(stat_to_use);
var medians = below_table.get("Median");

var row_labels = table.rowLabels
var controls = [];
var upper_vals = [];
var lower_vals = [];

var net_rows = table.netRows;
var rows_to_include = [];

var r = 0;
for (var row = 0; row < table.numberRows; row++) {
    if (net_rows.indexOf(row) == -1) {
        var new_row_label = form.newLabel(row_labels[row] + ":");
        var new_lower_label = form.newLabel("Lower value");
        var new_lower = form.newNumberBox("nl"+r);
        var new_upper_label = form.newLabel("Upper value");
        var new_upper = form.newNumberBox("nr"+r);
        new_upper.lineBreakAfter = true;
        controls.push(new_row_label, new_lower_label, new_lower, new_upper_label, new_upper);

for (var col = 0; col < table.numberColumns; col++) {
    var cur_median = NaN;
    var cumulative = [];
    for (var r = 0; r < rows_to_include.length; r++) {
        var row = rows_to_include[r];
        var new_cumulative = (r == 0) ? values[row][col] : cumulative[r-1] + values[row][col];
        if (new_cumulative > 50) {
            var diff = 50 - (r > 0 ? cumulative[r-1] : 0);
            var range = upper_vals[r] - lower_vals[r];
            var percentage = values[row][col];
            cur_median = lower_vals[r] + (diff / percentage) * range;
            medians[0][col] = cur_median;


below_table.set("Median", medians)

// Returns true if Statistics - Below are available for this table.
function belowTableExists() {
    var exists = true;
    try { 
    } catch (e) {
        exists = false;
    return exists;