Text Analysis - Sentiment

From Q
Jump to navigation Jump to search
Related Videos

Chapter within Text Analysis in Q5 (Video)

 

Save a variable which assigns scores to a set of text responses which attempts to quantify how positive or negative each response is

Generate a variable which assigns scores to a set of text responses which attempts to quantify how positive or negative each response is. This can be done based on raw text in the data set, or it can be based on text which has been processed using Text Analysis - Advanced - Setup Text AnalysisInsert > Text Analysis > Advanced > Setup Text Analysis.

This blog post describes how sentiment scores can be visualized with word clouds.

Text entries which were originally blank will get a missing value for the sentiment score and so will be excluded from the base. You can recode them in the usual way, by right-clicking the table with your sentiment scores and selecting Values (...), and then changing the value from NaN to 0.

Background

This R item uses dictionaries of positive and negative English-language words to generate the score for each response. Positive words in the text each add a score of 1, negative words add a score of -1, and the final score is obtained by summing these up. The scoring heuristic also attempts to identify when sentiment has been negated, for example not good would generate a score of -1 instead of a score for 1.

Usage

To generate scores in the data set, select either a SUMMARY table showing Text data, a variable in the Data tree containing Text data, or an text-processing item generated in your report using Text Analysis - Advanced - Setup Text AnalysisInsert > Text Analysis > Advanced > Setup Text Analysis.

Code

// Save Sentiment Scores
 
// Selected inputs can be:
// 1. A Text Question, either selected in Data, selected on the page, or on the currently selected page
// 2. An R Item. We currently can't check the class of the item in JS, but the R code will provide an error if the item is of the wrong type
 
includeWeb("QScript Utility Functions");
includeWeb("QScript Functions to Generate Outputs");
includeWeb("QScript R Output Functions");
includeWeb("QScript Selection Functions")
 
if (fileFormatVersion() < 9.11)
    log("This script requires a newer version.")
else
    main();
 
function main() {
 
    var bad_selection_message = "Select either text in a Table, text in Data, or an item that has been created using Text Analysis > Setup Text Analysis.";
    var web_mode = (!!Q.isOnTheWeb && Q.isOnTheWeb());
 
 
    // Figure out what input the user has selected.
    // R items take precendence over questions that
    // are selected in the Data or in the selected
    // table, but they must have the right class.
    var selected_object;
    var selected_r_items = project.report.selectedItems().filter(function (t) { return t.type == "R Output"; });
    var selected_text_questions = project.report.selectedQuestions().filter(function (q) { return q.questionType == "Text"; });
 
    // Look on the current page for text or wordBag items
    var selected_raw = project.report.selectedRaw()
    var selected_item = selected_raw[0];
 
    var selected_group;
    var tables_on_page = [];
    var text_on_page = [];
    var word_bags_on_page = [];
 



    if (typeof selected_item != "undefined") {
        selected_group = selected_item.type == "ReportGroup" ? selected_item : selected_item.group;
        tables_on_page = selected_group.subItems.filter(function (item) { return item.type == "Table" 
                                                                                    && item.primary != null 
                                                                                    && item.primary.questionType == "Text"; });
        text_on_page = tables_on_page.map(function (table) { return table.primary; });
        word_bags_on_page = selected_group.subItems.filter(function (item) { return item.type == "R Output" 
                                                                                     && item.error === null 
                                                                                     && item.outputClasses !== null
                                                                                     && item.outputClasses.indexOf("wordBag") > -1; });
    }
 
 
 
    if (selected_r_items.length > 1) {
        log("Too many inputs selected. " + bad_selection_message);
        return false;
    } else if (selected_r_items.length == 1) {
        selected_object = selected_r_items[0];
        if (!checkROutputIsValid(selected_object))
            return false;
        if (selected_object.outputClasses.indexOf("wordBag") == -1) {
            log(bad_selection_message);
            return false;
        }
    } else if (selected_text_questions.length > 1) {
        log("Too many inputs selected. " + bad_selection_message);
        return false;
    } else if (selected_text_questions.length == 1 ){
        selected_object = selected_text_questions[0];
    } else if (text_on_page.length + word_bags_on_page.length == 1) {
        if (word_bags_on_page.length == 1)
            selected_object = word_bags_on_page[0];
        else
            selected_object = text_on_page[0];
    } else {
        log(bad_selection_message);
        return false;
    }
 
 
    // Figure out the source question so we can put the new
    // variable below it, and to disambiguate in the case of
    // multiple data files.
    var source_question;
    var selected_type = selected_object.type;
    if (selected_type == "Question")
        source_question = selected_object
    else {
        source_question = selected_object.getInput("formtextvar");
    }
 
    // Figure out the right data file to add to (if more than one)
    var target_data_file = project.dataFiles[0];
    if (project.dataFiles.length > 0) {
        target_data_file = source_question.dataFile;
    }
 
    var input_string = selected_type == "Question" ? generateDisambiguatedVariableName(selected_object.variables[0]) : stringToRName(selected_object.referenceName);     
    var r_expression = "library(flipTextAnalysis)\r\n"
                    +  "sentiment.scores = SaveNetSentimentScores(" + input_string + ", check.simple.suffixes = TRUE, blanks.as.missing = TRUE)"; 
 
    var new_q_name = preventDuplicateQuestionName(target_data_file, "Sentiment scores from " + selected_object.name);
    var new_var_name = selected_type == "Question" ? selected_object.variables[0].name : selected_object.name;
    new_var_name = cleanVariableName(new_var_name);
    new_var_name = preventDuplicateVariableName(target_data_file, new_var_name + "_sentiment");
    try {
        var new_var = target_data_file.newRVariable(r_expression, new_var_name, new_q_name, null);
    } catch (e) {
        log("Sentiment scores could not be created from this item: " + e);
        return false;
    }
    var new_question = new_var.question;
 
    // Move the new variable below its input
    target_data_file.moveAfter([new_var], source_question.variables[0]);
 
    // Generating outputs
    if (!web_mode) {
        var new_group = generateGroupOfSummaryTables(new_q_name, [new_question]);
        project.report.setSelectedRaw([new_group.subItems[0]]);
    }
 
    return true;
}