JavaScript Text Analysis Functions

From Q
Jump to: navigation, search

This page contains a list of functions that are designed to be used for interrogating and text in variable labels, value labels, and from the responses to Text questions. See Manipulating Text with JavaScript for functions for manipulation text in data.

To make these functions available when writing a QScript or Rule see JavaScript Reference.

containsAnyStrings(x, search_terms)

Searches through a string and returns a true if any of the contents of the array called search_terms are contained within the string x.

containsNumber(x)

This function returns true if the input string contains any digits.

containsSubstring(label, possible_substrings)

This function searches the input label for the strings contained in the array possible_substrings, automatically ignoring the cases of the strings. It returns true if any of the substrings are found.

isDontKnow(label)

Returns a true if a string seems to represent a don't know response. It automatically ignores distinctions in case.

isOther(label)

Returns true if the input label contains the word "other". It automatically ignores the case of the input label.

isNoneOfThese(label)

This function returns true if the input label contains the words none or nothing, automatically ignoring the case of label.

isNone(label)

Returns a true if a string contains the word none. It automatically ignores distinctions in case.

isAllOfThese(label)

This function returns true if the input label contains any of the strings all of these, any of these, all of them, or any of them, or if the label is exactly any or all, ignoring the case of label.

isRange(label)

This function returns true if the input label contains a hyphen or the substring ' to '. This kind of string commonly indicates that the label is referring to a range of values. For example, a question asking for the respondents ages may give options like 18 to 29, 30 to 39, etc.

computeMidpoint(x)

This function computes the midpoints of an array of numbers, assuming they are ordered. For example:

  • Converts '[10]' into '10'.
  • Converts '[10,12]' to '11'.
  • Converts '[1,3, 10]' to '3'.
  • Converts '[1,3,4 10]' to '3.5'.

See Create New Variables - Midpoint Coding and Quantification for an example that uses this function.

quantifyArray(x)

This function interprets each string in an array as a number. For example:

  • Converts 'None' into 0.
  • Converts 'three' to 3.
  • Converts 'Up to £20,000' to '20000'.

This function requires that you also use the function Quantify.

quantify(label)

This function interprets a string returning a number. For example:

  • Converts 'None' into 0.
  • Converts 'three' to 3.
  • Converts 'Up to £20,000' to '20000'.

This function requires that you also use the function AsNumber.

labelRefersToTime(label)

This function returns true if the input label contains English words or abbreviations that refer to time periods. This includes day, week, month, quarter ,year, second, minute, hour, mths, hrs, and wks.

labelsReferToTime(label_array, max_number)

This function returns true if more than max_number of the labels in label_array refer to time.

getBottomAnchoredReplacementValueForLabel(label)

This function is used when sorting tables in descending order and certain labels are to be kept at the bottom of the table. This function generates a large negative number for such labels to ensure that they are kept at the bottom during the sort (or 'anchored').

The values are:

  • Other/Specify categories: -999999999995
  • None of these categories: -999999999996;
  • All of these categories: -999999999997;
  • Don't Know categories: -999999999998;
  • NET and SUM: -999999999999;

openEndedTopLabel(label)

This function returns true if the input label refers to a numeric quantity, but is in some sense open-ended from above. This includes labels like 55 or more, greater than 95%, and 8+. Although these categories refer to numeric quantities, the open-endedness makes them difficult to quantify in the same way as other numeric labels.

Source Code

/**
* Searches through a string and returns a true if any of the contents of the array called
* search_terms are contained within the string x.
*/
function containsAnyStrings(x, search_terms) {
    for (var i = (search_terms).length - 1; i >= 0; i--) {
        if(x.indexOf(search_terms[i]) != -1){
            return true;
            break;
        }
    }
    return false;
}
 
//checks if it contains a number
function containsNumber(x) {
    return /\d/.test(x);
}
 
 
// checks to see if a label contains one of the possible substrings (array)
function containsSubstring(label, possible_substrings) {
    label = label.toLowerCase();
    return possible_substrings.some(function (ps) { return label.indexOf(ps.toLowerCase()) != -1;});
}
 
 
 
var _DK_STRINGS_GLOBAL = ["dont know", "don t know","unsure","not sure","do not know","no idea","not applicable"];

// checks to see if a label represents a don't know
function isDontKnow(label) {
    label = label.replace(/[^\w\s]/gi, '');
    label = label.toLowerCase();
    if (containsSubstring(label, _DK_STRINGS_GLOBAL))
        return true;
    if (label == "NA" || label == "na" || label == "dk") //removes labels that are NA
            return true;
    return false;
}
 
// checks to see if a label is an other label
function isOther(label) {
    var label_lower = label.toLowerCase();
    return /\sother|^other/.test(label_lower);
}
 
// checks to see if a label represents a 'none of these' type option
function isNoneOfThese(label) {
    label = label.toLowerCase();
    return containsSubstring(label, ["none","nothing"]);
}
 
function isNone(label) {
   var possible = ["none"];   
   for (var i in possible)
     if((label).toLowerCase().indexOf(possible[i]) != -1)
        return true;
   return false;
}
 
// checks to see if a label represents a 'none of these' type option
function isAllOfThese(label) {
    label = label.toLowerCase();
    if (containsSubstring(label, ["all of these","any of these","all of them","any of them"]))
        return true;
    if (label  == "any" || label == "all")
        return true;
    return false;
}
 
//checks if it is a part of a range
function isRange(label) {
    return label.indexOf('-') != -1 || label.indexOf(' to ') != -1;
}
 
function computeMidpoint(x) {
    var n = x.length;
    switch (n)
    {
        case 1:
            return x[0];
        case 2:
            if (isNumber(x[0]))
                return (x[0] + x[1]) / 2.0;
            return x[1]; // this deals with the situation where the first element of the array may contain something like 'up to' which is misread as a delimiter
        case 0:
            return NaN;
        default:
            var half = Math.floor(n / 2);
            if (n % 2)
                return x[half]
            else
                return (x[half - 1] + x[half]) / 2.0;
    }
}
 
function quantifyArray(x){
    return x.map(quantify);
}
 
function quantify(label) {
    label = label.toLowerCase();
    var result = asNumber(label);
    if (isNumber(result))
        return result;
    if(label.indexOf('none') != -1)
        return 0;
    if(label.indexOf('nothing') != -1)
        return 0;
    if(label.indexOf('never') != -1)
        return 0;
    if(label.indexOf('zero') != -1)
        return 0;
    if(label.indexOf('someone') != -1)//to avoid a misread of the next value
        return NaN;
    if(label.indexOf('one') != -1)
        return 1;
    if(label.indexOf('two') != -1)
        return 2;
    if(label.indexOf('three') != -1)
        return 3;
    if(label.indexOf('four') != -1)
        return 4;
    if(label.indexOf('five') != -1)
        return  5;
    if(label.indexOf('six') != -1)
        return 6;
    if(label.indexOf('seven') != -1)
        return 7;
    if(label.indexOf('eight') != -1)
        return 8;
    if(label.indexOf('nine') != -1)
        return 9;
    if(label.indexOf('ten') != -1)
        return 10;
    if(label.indexOf('eleven') != -1)
        return 11;
    if(label.indexOf('twenty') != -1)
        return 20;
    if(label.indexOf('fifty') != -1)
        return 50;
    if(label.indexOf('one hundred') != -1)
        return 100;
    if(label.indexOf('one-hundred') != -1)
        return 100;
    return NaN;
}
 
function labelRefersToTime(label) {
    label = label.toLowerCase();
    return containsSubstring(label, ["day", "week", "month", "quarter" ,"year", "second", "minute", "hour", " mths", " hrs", " wks"]);
}
 
function labelsReferToTime(label_array, max_number) {
    return label_array.filter(labelRefersToTime).length > max_number;
}
// This function returns a very small value for labels that are
// 'Other', 'Dont Know', 'None of These', 'All of these' 
function getBottomAnchoredReplacementValueForLabel(label) {
    if (isOther(label))
        return -999999999995;
    else if (isNoneOfThese(label))
        return -999999999996;
    else if (isAllOfThese(label))
        return -999999999997;
    else if (isDontKnow(label))
        return -999999999998;
    else if (label == "NET" || label == "SUM")
        return -999999999999;
    else
        return null;
}
 
// Returns true if the label can be quantified but contains
// strings that indicate that the quantity is open-ended from above
//
// For example "more than 5", "5 or more", "5+"
function openEndedTopLabel(label) {
    label = label.toLowerCase();
    return !isNaN(quantify(label)) && containsSubstring(label, ["more", "at least", "greater", "older", "plus", "+", "over"]);
}

See Also