Choice Modeling - Convert Alchemer (Survey Gizmo) Conjoint Data for Analysis

From Q
Jump to navigation Jump to search

Convert Alchemer (Survey Gizmo) Conjoint data to a format that can be used for conjoint/choice modeling in Displayr

This QScript makes it possible to analyze Alchemer (formerly Survey Gizmo) Conjoint data in Q Displayr. Before running this script, Alchemer (formerly Survey Gizmo) Conjoint and respondent data files first need to be added to the project. This script produces questions containing the choices and the respondent version in the respondent data set. These inputs can then be used to run a Conjoint analysis using Latent Class Analysis, Multinomial Logit, or Hierarchical Bayes.

The user is prompted to select a Conjont data set imported from Alchemer (formerly Survey Gizmo), and a respondent data set containing a question Response ID.

To run this script, the Conjoint variable set must first be selected in the Data Tree. If there is more than one other data set containing a variable set Response ID then the respondent data set must also be selected.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

▶ Show Code

includeWeb("QScript R Output Functions")

function dataFileHasQuestionWithName(data_file, name, exact = false)
{
    var questions = data_file.questions;
    var n_questions = questions.length;
    for (var i = 0; i < n_questions; i++)
    {
        var patt = exact ? new RegExp("^" + name + "$") : new RegExp("^" + name);
        if (patt.test(questions[i].name))
            return true;
    }
    return false;
}


function getQuestionFromDataFile(data_file, name, exact = false)
{
    var questions = data_file.questions;
    var n_questions = questions.length;
    for (var i = 0; i < n_questions; i++)
    {
        var patt = exact ? new RegExp("^" + name + "$") : new RegExp("^" + name);
        if (patt.test(questions[i].name))
            return questions[i];
    }
    return null;
}

function isRespondentDataFile(data_file)
{
    return dataFileHasQuestionWithName(data_file, "Response ID", true) &&
//           dataFileHasQuestionWithName(data_file, "Date Submitted", true) &&
           !dataFileHasQuestionWithName(data_file, "Set Number", false);
}

function isConjointDataFile(data_file)
{
    return dataFileHasQuestionWithName(data_file, "Response ID", true) &&
           dataFileHasQuestionWithName(data_file, "Set Number", false) &&
           dataFileHasQuestionWithName(data_file, "Card Number", false) &&
           dataFileHasQuestionWithName(data_file, "Score", false);
}

function main()
{
    includeWeb('QScript Utility Functions');

    var data_files = project.dataFiles;

    if (data_files.length < 2)
    {
        log("There needs to be both a respondent data set and a choice data set loaded into your project");
        return;
    }

    var is_displayr = (!!Q.isOnTheWeb && Q.isOnTheWeb());
    
    if (is_displayr)
    {
        var variables = project.report.selectedVariables();
        if (variables.length == 0)
        {
            log("A Conjoint data set needs to be selected from 'Data Sets'.");
            return;
        }
        
        // Identify which data sets are selected
        var selected_data_file_names = new Set();
        var unique_data_files = [];
        var data_file;
        for (var i = 0; i < variables.length; i++)
        {
            data_file = variables[i].question.dataFile;
            if (!selected_data_file_names.has(data_file.name))
            {
                selected_data_file_names.add(data_file.name);
                unique_data_files.push(data_file);
            }
        }
        if (unique_data_files.length >= 3)
        {
            log("More than 2 data sets are selected. Select a Conjoint data set and a respondent data set.");
            return;
        }
        
        var conjoint_data_file = null;
        var respondent_data_file = null;
        
        if (unique_data_files.length == 1)
        {
            conjoint_data_file = unique_data_files[0];
            if (!isConjointDataFile(conjoint_data_file))
            {
                log("A Conjoint data set needs to be selected from 'Data Sets'.");
                return;
            }
            // Iterate over all data sets in project to identify respondent data set
            var respondent_data_file_index = -1;
            for (var i = 0; i < data_files.length; i++)
            {                
                if (isRespondentDataFile(data_files[i]))
                {
                    if (respondent_data_file_index != -1)
                    {
                        log("Multiple respondent data sets have been identified. Please select both the particular respondent data set you wish to use as well as the conjoint data set from the Data Tree, and try again.");
                        return;
                    }   
                    respondent_data_file_index = i;
                }
            }
            if (respondent_data_file_index > -1)
                respondent_data_file = data_files[respondent_data_file_index];
            else
            {
                log("No suitable respondent data set was found.");
                return;
            }
        }
        else // 2 data files selected
        {
            var file1 = unique_data_files[0];
            var file2 = unique_data_files[1];
            if (isConjointDataFile(file1))
            {
                conjoint_data_file = file1;
                respondent_data_file = file2;
            }
            else
            {
                if (!isConjointDataFile(file2))
                {
                    log("Please select a Conjoint data set from 'Data Sets'.");
                    return;
                }
                conjoint_data_file = file2;
                respondent_data_file = file1;
            }
            if (!isRespondentDataFile(respondent_data_file))
            {
                log("You have selected two data sets and a Conjoint data set has been identified. The other data set is required to contain a 'Response ID' but does not.");
                return;
            }
        }
    }
    else // In Q, ask user to select data sets
    {
        includeWeb('QScript Selection Functions');
        var conjoint
        conjoint_data_file = selectOneDataFile('Select the Survey Gizmo Conjoint data set:', project.dataFiles);
        if (!isConjointDataFile(conjoint_data_file))
        {
            log("The selected data set is not a Conjoint data set.");
            return;
        }

        var respondent_data_file = selectOneDataFile('Select the respondent data set:', project.dataFiles);
        if (!isRespondentDataFile(respondent_data_file))
        {
            log("The selected data set is not a respondent data set.");
            return;
        }
    }

    // Get the number of alternatives per question
    // One example Survey Gizmo file had invalid Card Number values for what seem to be
    // respondents that had missed certain questions, so instead determine alt_per_question
    // from Set Number
    //     var raw_values = getQuestionFromDataFile(conjoint_data_file, "Card Number").variables[0].rawValues;
    //     var alt_per_question = 0;
    //     for (var i = 0; i < raw_values.length; i++)
    //         if (alt_per_question < raw_values[i])
    //             alt_per_question = raw_values[i];
    var raw_values = getQuestionFromDataFile(conjoint_data_file, "Set Number").variables[0].rawValues;
    var setn1 = raw_values[0];
    var alt_per_question = 1;
    for (var i = 1; i < raw_values.length; i++) {
        if (raw_values[i] == setn1)
            alt_per_question++;
        else
            break;
    }    

    // Get the number of questions
    var raw_values = getQuestionFromDataFile(conjoint_data_file, "Set Number").variables[0].rawValues;
    var n_questions = 0;
    for (var i = 0; i < raw_values.length; i++)
        if (n_questions < raw_values[i])
            n_questions = raw_values[i];

    // Check Conjoint data file: question/Set Number variable has right format
    // e.g. 1,1,2,2,3,3,1,1,2,2,... for alt_per_question = 2, n_questions = 3
    // Commented out because it fails for cases where the respondent didn't answer a question,
    //   which can be safely ignored
    // for (var i = 0; i < raw_values.length; i++)
    //     if (Math.floor(i % (alt_per_question*n_questions)/alt_per_question) +1 != raw_values[i])  // (i % n_questions) + 1
    //     {
    //         log("There was an issue reading the Survey Gizmo Conjoint file. Please check that each respondent has the same number of questions.");
    //         return;
    //     }

    // Respondent and Conjoint data set Response ID question
    var respondent_response_id = "`" + respondent_data_file.name + "`$Questions$`Response ID`";
    var conjoint_response_id = "`" + conjoint_data_file.name + "`$Questions$`Response ID`";

    // Move choices to the respondent data set
    var previous_variable = null;
    for (var i = 0; i < n_questions; i++)
    {
        var expr = "q.idx <- " + (i + 1) + "\n" +
            "alt.per.question <- " + alt_per_question + "\n" + 
            "lvls <- 1:alt.per.question\n" +
            "id.respondent <- " + respondent_response_id + "\n" +
            "id.conjoint <- " + conjoint_response_id + "\n" +
            "design.set.number <- " + stringToRName(conjoint_data_file.fileName) + "$Questions$" + 
                          stringToRName(getQuestionFromDataFile(conjoint_data_file, "Set Number", false).name) + "\n" +
            "alternative <- " + stringToRName(conjoint_data_file.fileName) + "$Questions$" + 
                          stringToRName(getQuestionFromDataFile(conjoint_data_file, "Card Number", false).name) + "\n" +
            "score <- " + stringToRName(conjoint_data_file.fileName) + "$Questions$" + 
                          stringToRName(getQuestionFromDataFile(conjoint_data_file, "Score", false).name) + "\n" +
            "id <- id.conjoint[design.set.number == q.idx & alternative == 1]\n" + 
            "choice <- numeric(length(id))\n" + 
            "choice <- as.integer(score[design.set.number == q.idx] == '100')\n" +
            "choice <- matrix(choice, ncol = alt.per.question, byrow = TRUE)\n" + 
            "choice[which(rowSums(choice) != 1), ] <- NA\n" + 
            "choice <- rowSums(choice * col(choice))\n" +
            "choice <- choice[match(id.respondent, as.character(id)) ]\n" +
            "choice <- factor(choice, levels = lvls)";
        var choice_name = preventDuplicateVariableName(respondent_data_file, "Choice");
        // var worst_name = preventDuplicateVariableName(respondent_data_file, "WorstChoice");
        var choice_label = preventDuplicateQuestionName(respondent_data_file, "Choice " + (i + 1) + " from " + conjoint_data_file.name);
       // var worst_label = preventDuplicateQuestionName(respondent_data_file, "Worst " + (i + 1) + " from " + conjoint_data_file.name);
        try {
            previous_variable = respondent_data_file.newRVariable(expr,
                     choice_name, choice_label, previous_variable);
        } catch (e)
        {
            log("Could not create Choice variable: " + e);
            return false;
        }
    }

    // Create version variable
    var expr_version = "n.q <- " + n_questions + "\n" +
        "alt.per.question <- " + alt_per_question + "\n" + 
        "id.respondent <- " + respondent_response_id + "\n" +
        "id.conjoint <- " + conjoint_response_id + "\n" +
        "design.set.number <- " + stringToRName(conjoint_data_file.fileName) + "$Questions$" + 
                      stringToRName(getQuestionFromDataFile(conjoint_data_file, "Set Number", false).name) + "\n" +
        "alternative <- " + stringToRName(conjoint_data_file.fileName) + "$Questions$" + 
                      stringToRName(getQuestionFromDataFile(conjoint_data_file, "Card Number", false).name) + "\n" +
        "score <- " + stringToRName(conjoint_data_file.fileName) + "$Questions$" + 
                      stringToRName(getQuestionFromDataFile(conjoint_data_file, "Score", false).name) + "\n\n" +
        "q.idx <- 1\n" + 
        "id <- id.conjoint[design.set.number == q.idx & alternative == 1]\n" + 
        "choice <- numeric(length(id))\n" + 
        "choice <- as.integer(score[design.set.number == q.idx] == '100')\n" +
        "choice <- matrix(choice, ncol = alt.per.question, byrow = TRUE)\n" + 
        "choice[which(rowSums(choice) != 1), ] <- NA\n" + 
        "choice <- rowSums(choice * col(choice))\n" +
        "choice <- choice[match(id.respondent, as.character(id)) ]\n" +
        "non.missing.idx <- !is.na(choice)\n\n" + 
        "for (q.idx in 2:n.q) {\n" +
        "     id <- id.conjoint[design.set.number == q.idx & alternative == 1]\n" + 
        "     choice <- as.integer(score[design.set.number == q.idx] == '100')\n" + 
        "     choice <- matrix(choice, ncol = alt.per.question, byrow = TRUE)\n" + 
        "     choice[which(rowSums(choice) != 1), ] <- NA\n" + 
        "     choice <- rowSums(choice * col(choice))\n" + 
        "     choice <- choice[match(id.respondent, as.character(id)) ]\n" +
        "     non.missing.idx <- non.missing.idx & !is.na(choice)\n" + 
        "}\n\n" + 
        "version <- suppressWarnings(as.numeric(id.respondent))\n" +   
        "# assign dummy version number for missing values to prevent error when checking design in\n" +
        "#   choice model code; will be ignored during model fitting\n" +
        "version[!non.missing.idx] <- as.numeric(id.conjoint[1])\n" +  
        "version";   

    var version_name = preventDuplicateVariableName(respondent_data_file, "Version");
    var version_label = preventDuplicateQuestionName(respondent_data_file, "Version" + " from " + conjoint_data_file.name);
    try {
        respondent_data_file.newRVariable(expr_version, version_name, version_label, previous_variable);
    } catch (e) {
        log("Could not create Version variable: " + e);
        return false;
    }

    var structure = is_displayr ? "Variable Sets" : "Questions";
    if (!is_displayr)
        log(structure + " containing the choices and the respondent versions have been added to the respondent data set.");
}

main();

See also