QScript Functions for Fixing Truncated Labels

From Q
Jump to navigation Jump to search

To make these functions available when writing a QScript or Rule see JavaScript Reference. These functions are designed to assist with the fixing of truncated variable labels.

longestCommonLabelSuffix(question, use_source)

Examine the set of suffixes for the labels in the question and return the longest one when there is common text in the suffixes. The longest suffix is a good candidate for a question name when the labelling convention has the item label followed by the name of the question. use_source is a boolean flag to specify whether the function should examine the Source Labels or the current labels of each variable.

shortestCommonSuffix(labels)

Return the shortest non-empty string which is common at the end of all of the input labels, excluding any suffixes that are empty (which happens when the suffix has been completely truncated).

longestEndSegment(label_1, label_2)

Find the longest end-segment of label_2 in label_1. An end-segment is a substring that appears at the end of the string.

fixLabelTruncation(question, truncation_string)

Remove text starting with truncation_string in each label of each variable in the input question.

labelsAreTruncated(labels)

If the labels contain common text that is truncated then this function returns a string that can be used to split the common suffix from each label, otherwise it returns null.

Source Code

includeWeb('QScript Utility Functions');

// Examine the set of suffixes for the labels in the question.
// The longest suffix is a good candidate for a question name
// when the labeling convention has the item label followed
// by the name of the question.
function longestCommonLabelSuffix(question, use_source) {
    var variables = question.variables;
    var labels = variables.map(function(v) { return use_source ? v.sourceLabel : v.label; });
    var shortest_common = shortestCommonTruncatedSuffix(labels);
    
    // No common suffix text found
    if (shortest_common.length == 0)
        return "";

    // Generate the proposed suffix text for each variable by
    // splitting according to the common text
    var suffixes = labels.map(function (label) {
        var split_label = label.split(shortest_common);
        return shortest_common + (split_label.length > 1 ? split_label[1] : "");
    });

    // Sort the suffixes from longest to shortest and check that each shorter label
    // is a substring of previous one to make sure that we have not accidentally
    // found a common string which does not really delimit the suffix.
    suffixes.sort(function (a, b) {
        return b.length - a.length;
    });
    for (var j = 1; j < suffixes.length; j++) {
        if (suffixes[j-1].indexOf(suffixes[j]) != 0)
            return "";
    }

    // Return the longest suffix
    return suffixes[0];
}

// Find the shortest common string near the end of the variable labels in the input question.
// The returned string is gauranteed to be a suffix of at least one variable.
// This excludes suffixes which are empty (indicating labels which have had the
// entire suffix truncated off).
function shortestCommonTruncatedSuffix(labels) {

    // Find the strings which are common at the end of each pair of labels
    // under the understanding that the labels might be truncated
    var suffixes = [];
    labels.forEach(function (label1) {
        labels.forEach(function (label2) {
            var common = longestEndSegment(label1, label2);
            if (suffixes.indexOf(common) == -1 && common.length > 0)
                suffixes.push(common);
        });
    });

    // Sort suffixes from shortest to longest
    suffixes.sort(function (a, b) {
        return a.length - b.length;
    });

    // Function to determine if a candiate for a suffix truly 
    // defines the suffix for this set of labels. The idea is that
    // if we split each label by the suffix, the bits remaining at
    // the ends of the labels should all overlap with one another
    // in the sense that a longer string will always contain all of
    // the shorter strings
    function checkSuffix(suffix) {
        // Catch poor candidates early and prevent coincidences of two labels accidentally
        // ending the same way
        if (suffix.length < 4)
            return false;
        if (labels.filter(function (label) { return label.indexOf(suffix) != -1; }).length < labels.length / 3)
            return false;
        if (labels.filter(function (label) { return label == suffix; }).length > 0)
            return false;

        // Return the strings which appear after suffix in each label
        var split_ends = labels.map(function (label) {
            return label.split(suffix).slice(1).join(suffix);
        });
        // Sort the strings from longest to shortest
        split_ends.sort(function (a, b) { return b.length - a.length; });
        // If a longer string does not begin with the shorter string
        // then this is not the string which defines the suffixes in this set
        for (var j = 1; j < split_ends.length; j++) {
            if (split_ends[j-1].indexOf(split_ends[j]) != 0)
                return false;
        }
        // This suffix splits all labels in the right way
        return true;
    }

    for (var j = 0; j < suffixes.length; j++) {
        var cur_suffix = suffixes[j]
        if (checkSuffix(cur_suffix))
            return cur_suffix;
    }
    
    // Common suffix not found
    return "";
}

// Find the longest end-segment of label_2 in label_1.
// An end-segment is a substring that appears at the end of the string.
function longestEndSegment(label_1, label_2) {
    var longest_segment = "";
    for (var j = label_2.length - 2; j > -1; j--) {
        var current_segment = label_2.substring(j, label_2.length);
        if (label_1.indexOf(current_segment) > -1)
            longest_segment = current_segment;
        else
            break;
    }
    return longest_segment;
}


// Remove truncated text from the labels of the variables in the input
// question, begining with truncation_string
function fixLabelTruncation(question, truncation_string) {
    question.variables.forEach(function (v) {
        v.label = v.label.split(truncation_string)[0];
    });
}

// If the variable labels of the variables are truncated returns the string
// which can be used to split off the truncated part. Otherwise returns null.
function labelsAreTruncated(labels) {
    var suffix_start = shortestCommonTruncatedSuffix(labels);
    if (suffix_start.length == 0)
        return null;
    var suffixes = labels.map(function (label) { return label.split(suffix_start).slice(1).join(suffix_start); });
    if (suffixes.every(function (label) { return label == suffixes[0]; }))
        return null; // Not truncated, all suffixes the same
    else
        return suffix_start;     
}


See also