Preliminary Project Setup - Suggest Better Question Names from Source Labels

From Q
Jump to navigation Jump to search

Suggests changes to variable set names for selected variable sets based on the original labels in the data file This QScript tries to improve the names of the questions in the project by searching through the text of the original labels in the data file. In some cases, the names for multiple response questions supplied in the data file contain less information than the label text, and this QScript can find and replace that information for you. If the extra text is still showing in the labels then this QScript also tidy it up.

When the script runs you will be asked to specify the Questions you want to try to obtain better names for. The script will then give you a list of potential name improvements, and you can choose which to use. If the suggested names are not meaningful then it is likely that the labels in the file do not contain better information for naming the questions.

Example

In this example we have a question which has been called q10 in the raw data file, but we can see in the Source Label column that the original labels in the data file contain better information about what the question asked, namely the text Q10. Why drinks more than one cola. As this text is more informative, we can use this QScript to rename q10 as Q10. Why drinks more than one cola automatically.

LabelImprovementExample.PNG

Technical details

It is usually better to instruct your data provider to add appropriate question text to the Set Label of the Multiple Response Set in the SPSS file because Q uses this text as the Question Name. In some cases, for example in older data processing software like Quantum, it is not possible to change this, and so this QScript can be used to get a tidier layout in Q. If your data provider does have control over these aspects of the formatting in the SPSS file then you should ask them to use the data file specifications linked here: SPSS Data File Specifications.

This QScript searches the Source Label of each of the variables in the selected question to find text that is common at the start and and the end of the label. If the labels have been truncated, which happens with some older data processing software, then this script will identify the longest common label suffix which shares text with the other label suffixes. That is, the script can still work even when the labels are truncated. In most cases, truncated labels in the data file will show up as messy labels in the Variables and Questions tab and on your tables.

How to apply this QScript

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Click on the QScript when it appears in the QScripts and Rules section of the search results.

OR

  • Select Automate > Browse Online Library.
  • Select this QScript from the list.

Customizing the QScript

This QScript is written in JavaScript and can be customized by copying and modifying the JavaScript.

Customizing QScripts in Q4.11 and more recent versions

  • Start typing the name of the QScript into the Search features and data box in the top right of the Q window.
  • Hover your mouse over the QScript when it appears in the QScripts and Rules section of the search results.
  • Press Edit a Copy (bottom-left corner of the preview).
  • Modify the JavaScript (see QScripts for more detail on this).
  • Either:
    • Run the QScript, by pressing the blue triangle button.
    • Save the QScript and run it at a later time, using Automate > Run QScript (Macro) from File.

Customizing QScripts in older versions

  • Copy the JavaScript shown on this page.
  • Create a new text file, giving it a file extension of .QScript. See here for more information about how to do this.
  • Modify the JavaScript (see QScripts for more detail on this).
  • Run the file using Automate > Run QScript (Macro) from File.

JavaScript

// Better question names for questions with more than 1 variable
//
// Sometimes Mutliple Response Sets are labeled poorly even when
// the variable labels contain decent question naming information.

includeWeb("QScript Selection Functions");
includeWeb("QScript Functions for Fixing Truncated Labels");


if (!main())
    log("QScript Cancelled.");
else
    log("QScript Finished.");

function main() {

    var is_displayr = (!!Q.isOnTheWeb && Q.isOnTheWeb());
    var structure_name = is_displayr ? "variable set" : "question";
    var data_file = requestOneDataFileFromProject(false, true);
    // Get an array of all non-hidden, multiple-variable questions
    var candidate_questions = data_file.questions.filter(function (q) { return !q.isHidden && !q.isBanner && q.variables.length > 1; });

    if (candidate_questions.length == 0) {
        log("No appropriate " + structure_name + "s found.");
        return false;
    }

    var selected_questions = selectManyQuestions("Select " + structure_name +
        " names to try and improve:", candidate_questions, true).questions;

    if (selected_questions.length == 0) {
        log("No " + structure_name + "s selected.");
        return false;
    }
 
    var name_changes = [];
    var new_names = [];
    selected_questions.forEach(function (q) {
        var source_labels = q.variables.map(function (v) { return v.sourceLabel; });
        var prefix = longestCommonPrefix(source_labels);
        var suffix = longestCommonLabelSuffix(q, true);
        var use_suffix = prefix.length < suffix.length
        var longer = use_suffix ? suffix : prefix;
        
        // Remove spaces and punctuation characters from the start
        // of the proposed new question name
        longer = longer.replace(/^[\s:_\-.*+?^${}()\]\[@#;<>&]+/, "");

        if (longer.length > q.name.length) {
            // Make sure the proposed new name doesn't clash with any of the other proposed new names
            // or existing question names
            var new_name = preventDuplicateString(new_names, preventDuplicateQuestionName(q.dataFile, longer));
            var labels = q.variables.map(function (v) { return v.label; });
            // Check to see if any of the current variable labels contain the new question text.
            // This happens when the labels are truncated and Q has not tidied out the extra text.
            // We'll clean this up for the user later.
            var could_be_truncated = use_suffix && labels.filter(function (label) { return label.indexOf(longer) > -1; }).length > 0;
            var truncated_text = null;
            if (could_be_truncated)
                truncated_text = labelsAreTruncated(labels);
            name_changes.push({ original: q.name, improved: new_name, question: q, could_be_truncated: could_be_truncated, truncated_text: truncated_text });
            new_names.push(new_name);
        }
    });

    if (name_changes.length == 0) {
        log("Did not find any name improvements.");
        return false;
    }

    // Prompt the user to confirm the changes
    var changes_text = [];
    name_changes.forEach(function (obj) {
        changes_text.push(obj.original + "   --->   " + obj.improved);
    });
    var changes_to_make = selectMany("Select the name changes to make:", changes_text);

    var selected_changes = changes_to_make.map(function (j) { return name_changes[j]; });

    if (selected_changes.length > 0) {
        if (!is_displayr)
        {
            var new_group = project.report.appendGroup();
            new_group.name = "Renamed Questions";
            var text_item = new_group.appendText();
        }
        selected_changes.forEach(function (obj) {
            obj.question.name = preventDuplicateQuestionName(obj.question.dataFile, obj.improved);
            if (obj.could_be_truncated)
                fixLabelTruncation(obj.question, obj.truncated_text);
            if (!is_displayr)
            {
                var t = new_group.appendTable();
                t.primary = obj.question;
            }
        });

        if (!is_displayr)
        {
            var title_builder = Q.htmlBuilder();
            var text_builder = Q.htmlBuilder();
            title_builder.appendParagraph("Renamed Questions",  { font: 'Tahoma', size: 20 });
            text_builder.appendTable(selected_changes.map(function (obj) { return [obj.original, obj.improved]; }), [20, 60], null, { font: 'Lucida Console', size: 10 });
            text_item.title = title_builder;
            text_item.content = text_builder;
        } else
            project.report.setSelectedRaw(selected_questions);
        return true;
    } else {
        log("No changes selected.");
        return false;
    }
}

function preventDuplicateString(strings, new_string) {
    var altered_string = new_string;
    var counter = 1;
    while (true) {
        var is_duplicate = strings.indexOf(altered_string) != -1;
        if (!is_duplicate)
            return altered_string;
        altered_string = new_string + " " + counter;
        counter ++;
    }
}

See also