Data - Countries from IP Address(es) (Geocoding)

From Q
Jump to navigation Jump to search

This QScript creates a new country variable from an IP address variable. Handles IPv4 and IPv6 addresses. Note that geocoding is not an exact science and offers no guarantee of accuracy.

Acknowledgements

Uses the MaxMind database in the rgeolocate package.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

includeWeb('QScript Selection Functions');
includeWeb('QScript Utility Functions');

function isVariableTextCat(variable) {
    var v_type = variable.variableType;
    return v_type == 'Text' || v_type == 'Categorical';
}

function main() {
    var data_files = project.dataFiles;
    var IP_variable;

    if (data_files.length == 0) {
        log("There needs to be a data set containing an IP address variable in your project");
        return;
    }

    var is_displayr = (!!Q.isOnTheWeb && Q.isOnTheWeb());

    if (is_displayr) {
        const variables = project.report.selectedVariables();
        if (variables.length === 0) {
            log("At least one variable containing IP addresses needs to be selected from 'Data Sources'.");
            return;
        }
        const created_variables = [];
        let error_variable_labels = [];
        for (const candidate of variables) {
            if (!isVariableTextCat(candidate)) {
                error_variable_labels.push(candidate.label);
                continue;
            }
            const data_file_multi = candidate.question.dataFile;
            const code_multi = "result <- as.factor(flipAPI::GeocodeIPs(ips = " + candidate.name + ")[, 3])\n" +
                "if (all(is.na(result)))\n" +
                "    StopForUserError('No cases of the IP variable can be geocoded.')\n" +
                "result";
            const country_name_multi = preventDuplicateVariableName(data_file_multi, "IP.country");
            const country_label_multi = preventDuplicateQuestionName(data_file_multi, "IP.country");
            try {
                const new_var_multi = data_file_multi.newRVariable(code_multi, country_name_multi, country_label_multi, candidate);
                insertAtHoverButtonIfShown(new_var_multi.question);
                created_variables.push(new_var_multi);
            }
            catch (e) {
                error_variable_labels.push(candidate.label);
            }
        }
        if (created_variables.length > 0) {
            project.report.setSelectedRaw(created_variables);
        }
        let message = created_variables.length + " variable(s) has been added to the dataset. ";
        if (error_variable_labels.length > 0) {
            message += "However, the following variable(s) could not be processed: " + error_variable_labels.join(", ") + ".";
        }
        log(message);
        return;
    }
    else {
        var candidate_variables = [];
        data_files.forEach(function (data_file) {
            candidate_variables = candidate_variables.concat(data_file.variables.filter(isVariableTextCat));
        });

        IP_variable = selectOneVariableByQuestionNameAndLabel('Select the IP address variable:', candidate_variables, false);
    }

    var data_file = IP_variable.question.dataFile;

    // Create country variable
    var code = "result <- as.factor(flipAPI::GeocodeIPs(ips = " + IP_variable.name + ")[, 3])\n" +
        "if (all(is.na(result)))\n" +
        "    StopForUserError('No cases of the IP variable can be geocoded.')\n" +
        "result";
    var country_name = preventDuplicateVariableName(data_file, "IP.country");
    var country_label = preventDuplicateQuestionName(data_file, "IP.country");
    try {
        var new_var = data_file.newRVariable(code, country_name, country_label, IP_variable);
    }
    catch (e) {
        log("No cases of the IP variable can be geocoded.");
        return;
    }
    insertAtHoverButtonIfShown(new_var.question);
    project.report.setSelectedRaw([new_var]);
    log("A variable containing the countries of the IP addresses has been added to the data set.");
}

main();

See also