Hi all,
Is there a way in glide to implement the Levenshtein comparison? I have tables with wrongly spelled cities that i need to match.
Thx
Iâm pretty unsure what that is but you can try the JavaScript column. Add a JavaScript column, set the code to this example I got from ChatGPT, and set p1 and p2 to string 1 and string 2
function levenshtein(a, b) {
var tmp, i, j, alen = a.length, blen = b.length, row = Array(alen);
if (alen === 0) { return blen; }
if (blen === 0) { return alen; }
for (i = 0; i < alen; i++) { row[i] = i; }
for (i = 1; i <= blen; i++) {
tmp = i;
for (j = 1; j <= alen; j++) {
const cost = (a[j - 1] === b[i - 1]) ? 0 : 1;
const current = Math.min(row[j - 1] + 1, tmp + 1, row[j] + cost);
row[j - 1] = tmp;
tmp = current;
}
}
return row[alen - 1];
}
// Return the Levenshtein distance between the two inputs
return levenshtein(p1, p2);
Thx. Sounds great but I am a no code type, what do i do with java in glide?
So you need something like a âclosest matchâ relation?
Hmm, see my edited version of the last reply, or try letting chatgpt make it (explain it that it shouldnât wrap in a function and use return instead of console.log, and tell it about p1 p2 p3 etc)
But Iâm a little confused what the Levenshtein comparison is though, if you still need help, could you tell me more what it is?
@Robert_Benacka I use it for a Question and Answer based knowledge base. I have a table with RowID, Question, and Answer columns. Itâs a little complicated to explain in detail, but basically I build a JSON object of all data from the knowledge base table. Then I pass that into a javascript column along with the search words. The javascript basically uses the levenshtein method to return a comma delimited list of ROWIDâS that match along with a acore indicating how much of a match. Iâm still tweaking the tolerance numbers to get the best results but it works pretty well for my use case.
Itâs nice to be able to perform a search where spelling doesnât matter and multiple search terms can be in any order.
Below is the javascript that Iâm currently using.
p1 is the search phrase
p2 is the JSON object
p3 is the score tolerance, but it defaults to 0.25 if not set.
I added some google-like features, like â-â to exclude words, â+â to require words, quoting text to search exact phrases, etc
The results are scores and RowIDs that I split into arrays to use for a relation to source my collection.
In hindsight Iâd maybe approach it a little differently, so itâs not so complicated, but it is definitely possible to do it. I think an easier approach might be use a single value column to place your search phrase on all rows in the table being searched. Then add a template column merging all columns you want to search. Then add a javascript column to compare the search terms and return a score. Finally you could filter your collection to only show results above a certain score, and/or sort the rows based on that score.
Itâs definitely something I want to sit down and play with some more when I have timeâŚmaybe try some other search algorithms. It does work pretty decent though.
function fuzzySearch(jsonData, searchPhrase) {
// Function to calculate Levenshtein distance
function levenshteinDistance(a, b) {
const matrix = [];
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= a.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
if (b[i - 1] === a[j - 1]) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1, // Substitution
matrix[i][j - 1] + 1, // Insertion
matrix[i - 1][j] + 1 // Deletion
);
}
}
}
return matrix[b.length][a.length];
}
// Helper to calculate the similarity score
function similarityScore(search, text) {
const lowerSearch = search.toLowerCase();
const lowerText = text.toLowerCase();
const distance = levenshteinDistance(lowerSearch, lowerText);
// Base similarity score
let score = 1 - distance / Math.max(search.length, text.length);
// Boost score for standalone word matches
const searchWords = lowerSearch.split(/\s+/);
const textWords = lowerText.split(/\s+/);
let wordMatchScore = 0;
let standaloneMatchScore = 0;
searchWords.forEach(searchWord => {
const regex = new RegExp(`\\b${searchWord}\\b`, "i"); // Match standalone word
if (regex.test(text)) {
standaloneMatchScore += 0.3; // Higher boost for standalone matches
} else if (text.includes(searchWord)) {
wordMatchScore += 0.1; // Lower boost for substring matches
}
});
// Combine scores with weightage
score += wordMatchScore + standaloneMatchScore;
return Math.min(score, 1); // Ensure the score doesn't exceed 1
}
// Parse the search phrase for special cases
const quotedPhrases = [];
const requiredWords = [];
const excludedWords = [];
const cleanWords = [];
searchPhrase.match(/"([^"]+)"|\+(\S+)|-(\S+)|(\S+)/g).forEach(term => {
if (term.startsWith('"') && term.endsWith('"')) {
quotedPhrases.push(term.slice(1, -1));
} else if (term.startsWith('+')) {
requiredWords.push(term.slice(1));
} else if (term.startsWith('-')) {
excludedWords.push(term.slice(1));
} else {
cleanWords.push(term);
}
});
// Perform fuzzy search and calculate scores
const results = jsonData.SearchData
.map(item => {
if (!item.Question) {
item.Question = "default question"; // Default value for Question
}
if (!item.Answer) {
item.Answer = "default answer"; // Default value for Answer
}
const question = item.Question.toLowerCase();
const answer = item.Answer.toLowerCase();
// Check if item meets quoted phrases, required, and excluded word conditions
const containsQuoted = quotedPhrases.every(phrase =>
question.includes(phrase.toLowerCase()) || answer.includes(phrase.toLowerCase())
);
const containsRequired = requiredWords.every(word =>
question.includes(word.toLowerCase()) || answer.includes(word.toLowerCase())
);
const containsExcluded = excludedWords.some(word =>
question.includes(word.toLowerCase()) || answer.includes(word.toLowerCase())
);
if (!containsQuoted || !containsRequired || containsExcluded) {
return null; // Filter out this item
}
// Calculate similarity scores
const searchForScoring = [...cleanWords, ...quotedPhrases, ...requiredWords].join(" ");
const questionScore = similarityScore(searchForScoring, item.Question);
const answerScore = similarityScore(searchForScoring, item.Answer);
const maxScore = Math.max(questionScore, answerScore); // Use the highest score
return { RowID: item.RowID, score: maxScore };
})
.filter(result => result && result.score > scoreTolerance) // Filter out poor matches
.sort((a, b) => a.score - b.score); // Sort by ascending score
// Prepare output: Group percents and RowIDs
const percentGroup = results.map(result => Math.round(result.score * 100).toString()).join(",");
const rowIDGroup = results.map(result => result.RowID).join(",");
return `${percentGroup}|${rowIDGroup}`;
}
// Usage
const searchPhrase = p1;
const data = JSON.parse(p2);
const scoreTolerance = ((!p3) ? 0.25 : parseFloat(p3).toFixed(2));
const results = fuzzySearch(data, searchPhrase);
return results;