QScript Functions for Fixing Truncated Labels
To make these functions available when writing a QScript or Rule see JavaScript Reference. These functions are designed to assist with the fixing of truncated variable labels.
longestCommonLabelSuffix(question, use_source)
Examine the set of suffixes for the labels in the question and return the longest one when there is common text in the suffixes. The longest suffix is a good candidate for a question name when the labelling convention has the item label followed by the name of the question. use_source is a boolean flag to specify whether the function should examine the Source Labels or the current labels of each variable.
shortestCommonSuffix(labels)
Return the shortest non-empty string which is common at the end of all of the input labels, excluding any suffixes that are empty (which happens when the suffix has been completely truncated).
longestEndSegment(label_1, label_2)
Find the longest end-segment of label_2 in label_1. An end-segment is a substring that appears at the end of the string.
fixLabelTruncation(question, truncation_string)
Remove text starting with truncation_string in each label of each variable in the input question.
labelsAreTruncated(labels)
If the labels contain common text that is truncated then this function returns a string that can be used to split the common suffix from each label, otherwise it returns null.
Source Code
includeWeb('QScript Utility Functions');
// Examine the set of suffixes for the labels in the question.
// The longest suffix is a good candidate for a question name
// when the labeling convention has the item label followed
// by the name of the question.
function longestCommonLabelSuffix(question, use_source) {
var variables = question.variables;
var labels = variables.map(function(v) { return use_source ? v.sourceLabel : v.label; });
var shortest_common = shortestCommonTruncatedSuffix(labels);
// No common suffix text found
if (shortest_common.length == 0)
return "";
// Generate the proposed suffix text for each variable by
// splitting according to the common text
var suffixes = labels.map(function (label) {
var split_label = label.split(shortest_common);
return shortest_common + (split_label.length > 1 ? split_label[1] : "");
});
// Sort the suffixes from longest to shortest and check that each shorter label
// is a substring of previous one to make sure that we have not accidentally
// found a common string which does not really delimit the suffix.
suffixes.sort(function (a, b) {
return b.length - a.length;
});
for (var j = 1; j < suffixes.length; j++) {
if (suffixes[j-1].indexOf(suffixes[j]) != 0)
return "";
}
// Return the longest suffix
return suffixes[0];
}
// Find the shortest common string near the end of the variable labels in the input question.
// The returned string is gauranteed to be a suffix of at least one variable.
// This excludes suffixes which are empty (indicating labels which have had the
// entire suffix truncated off).
function shortestCommonTruncatedSuffix(labels) {
// Find the strings which are common at the end of each pair of labels
// under the understanding that the labels might be truncated
var suffixes = [];
labels.forEach(function (label1) {
labels.forEach(function (label2) {
var common = longestEndSegment(label1, label2);
if (suffixes.indexOf(common) == -1 && common.length > 0)
suffixes.push(common);
});
});
// Sort suffixes from shortest to longest
suffixes.sort(function (a, b) {
return a.length - b.length;
});
// Function to determine if a candiate for a suffix truly
// defines the suffix for this set of labels. The idea is that
// if we split each label by the suffix, the bits remaining at
// the ends of the labels should all overlap with one another
// in the sense that a longer string will always contain all of
// the shorter strings
function checkSuffix(suffix) {
// Catch poor candidates early and prevent coincidences of two labels accidentally
// ending the same way
if (suffix.length < 4)
return false;
if (labels.filter(function (label) { return label.indexOf(suffix) != -1; }).length < labels.length / 3)
return false;
if (labels.filter(function (label) { return label == suffix; }).length > 0)
return false;
// Return the strings which appear after suffix in each label
var split_ends = labels.map(function (label) {
return label.split(suffix).slice(1).join(suffix);
});
// Sort the strings from longest to shortest
split_ends.sort(function (a, b) { return b.length - a.length; });
// If a longer string does not begin with the shorter string
// then this is not the string which defines the suffixes in this set
for (var j = 1; j < split_ends.length; j++) {
if (split_ends[j-1].indexOf(split_ends[j]) != 0)
return false;
}
// This suffix splits all labels in the right way
return true;
}
for (var j = 0; j < suffixes.length; j++) {
var cur_suffix = suffixes[j]
if (checkSuffix(cur_suffix))
return cur_suffix;
}
// Common suffix not found
return "";
}
// Find the longest end-segment of label_2 in label_1.
// An end-segment is a substring that appears at the end of the string.
function longestEndSegment(label_1, label_2) {
var longest_segment = "";
for (var j = label_2.length - 2; j > -1; j--) {
var current_segment = label_2.substring(j, label_2.length);
if (label_1.indexOf(current_segment) > -1)
longest_segment = current_segment;
else
break;
}
return longest_segment;
}
// Remove truncated text from the labels of the variables in the input
// question, begining with truncation_string
function fixLabelTruncation(question, truncation_string) {
question.variables.forEach(function (v) {
v.label = v.label.split(truncation_string)[0];
});
}
// If the variable labels of the variables are truncated returns the string
// which can be used to split off the truncated part. Otherwise returns null.
function labelsAreTruncated(labels) {
var suffix_start = shortestCommonTruncatedSuffix(labels);
if (suffix_start.length == 0)
return null;
var suffixes = labels.map(function (label) { return label.split(suffix_start).slice(1).join(suffix_start); });
if (suffixes.every(function (label) { return label == suffixes[0]; }))
return null; // Not truncated, all suffixes the same
else
return suffix_start;
}