Two string comparison

Hi all,
Is there a way in glide to implement the Levenshtein comparison? I have tables with wrongly spelled cities that i need to match.
Thx

I’m pretty unsure what that is but you can try the JavaScript column. Add a JavaScript column, set the code to this example I got from ChatGPT, and set p1 and p2 to string 1 and string 2

function levenshtein(a, b) {
  var tmp, i, j, alen = a.length, blen = b.length, row = Array(alen);
  if (alen === 0) { return blen; }
  if (blen === 0) { return alen; }
  for (i = 0; i < alen; i++) { row[i] = i; }
  for (i = 1; i <= blen; i++) {
    tmp = i;
    for (j = 1; j <= alen; j++) {
      const cost = (a[j - 1] === b[i - 1]) ? 0 : 1;
      const current = Math.min(row[j - 1] + 1, tmp + 1, row[j] + cost);
      row[j - 1] = tmp;
      tmp = current;
    }
  }
  return row[alen - 1];
}

// Return the Levenshtein distance between the two inputs
return levenshtein(p1, p2);
1 Like

Thx. Sounds great but I am a no code type, what do i do with java in glide?

So you need something like a “closest match” relation?

Hmm, see my edited version of the last reply, or try letting chatgpt make it (explain it that it shouldn’t wrap in a function and use return instead of console.log, and tell it about p1 p2 p3 etc)

But I’m a little confused what the Levenshtein comparison is though, if you still need help, could you tell me more what it is? :thinking:

@Robert_Benacka I use it for a Question and Answer based knowledge base. I have a table with RowID, Question, and Answer columns. It’s a little complicated to explain in detail, but basically I build a JSON object of all data from the knowledge base table. Then I pass that into a javascript column along with the search words. The javascript basically uses the levenshtein method to return a comma delimited list of ROWID’S that match along with a acore indicating how much of a match. I’m still tweaking the tolerance numbers to get the best results but it works pretty well for my use case.

It’s nice to be able to perform a search where spelling doesn’t matter and multiple search terms can be in any order.

Below is the javascript that I’m currently using.
p1 is the search phrase
p2 is the JSON object
p3 is the score tolerance, but it defaults to 0.25 if not set.

I added some google-like features, like ‘-’ to exclude words, ‘+’ to require words, quoting text to search exact phrases, etc

The results are scores and RowIDs that I split into arrays to use for a relation to source my collection.

In hindsight I’d maybe approach it a little differently, so it’s not so complicated, but it is definitely possible to do it. I think an easier approach might be use a single value column to place your search phrase on all rows in the table being searched. Then add a template column merging all columns you want to search. Then add a javascript column to compare the search terms and return a score. Finally you could filter your collection to only show results above a certain score, and/or sort the rows based on that score.

It’s definitely something I want to sit down and play with some more when I have time…maybe try some other search algorithms. It does work pretty decent though.

function fuzzySearch(jsonData, searchPhrase) {
  // Function to calculate Levenshtein distance
  function levenshteinDistance(a, b) {
    const matrix = [];
    for (let i = 0; i <= b.length; i++) {
      matrix[i] = [i];
    }
    for (let j = 0; j <= a.length; j++) {
      matrix[0][j] = j;
    }

    for (let i = 1; i <= b.length; i++) {
      for (let j = 1; j <= a.length; j++) {
        if (b[i - 1] === a[j - 1]) {
          matrix[i][j] = matrix[i - 1][j - 1];
        } else {
          matrix[i][j] = Math.min(
            matrix[i - 1][j - 1] + 1, // Substitution
            matrix[i][j - 1] + 1,     // Insertion
            matrix[i - 1][j] + 1      // Deletion
          );
        }
      }
    }

    return matrix[b.length][a.length];
  }

  // Helper to calculate the similarity score
  function similarityScore(search, text) {
    const lowerSearch = search.toLowerCase();
    const lowerText = text.toLowerCase();
    const distance = levenshteinDistance(lowerSearch, lowerText);

    // Base similarity score
    let score = 1 - distance / Math.max(search.length, text.length);

    // Boost score for standalone word matches
    const searchWords = lowerSearch.split(/\s+/);
    const textWords = lowerText.split(/\s+/);

    let wordMatchScore = 0;
    let standaloneMatchScore = 0;

    searchWords.forEach(searchWord => {
      const regex = new RegExp(`\\b${searchWord}\\b`, "i"); // Match standalone word
      if (regex.test(text)) {
        standaloneMatchScore += 0.3; // Higher boost for standalone matches
      } else if (text.includes(searchWord)) {
        wordMatchScore += 0.1; // Lower boost for substring matches
      }
    });

    // Combine scores with weightage
    score += wordMatchScore + standaloneMatchScore;

    return Math.min(score, 1); // Ensure the score doesn't exceed 1
  }

  // Parse the search phrase for special cases
  const quotedPhrases = [];
  const requiredWords = [];
  const excludedWords = [];
  const cleanWords = [];

  searchPhrase.match(/"([^"]+)"|\+(\S+)|-(\S+)|(\S+)/g).forEach(term => {
    if (term.startsWith('"') && term.endsWith('"')) {
      quotedPhrases.push(term.slice(1, -1));
    } else if (term.startsWith('+')) {
      requiredWords.push(term.slice(1));
    } else if (term.startsWith('-')) {
      excludedWords.push(term.slice(1));
    } else {
      cleanWords.push(term);
    }
  });

  // Perform fuzzy search and calculate scores
  const results = jsonData.SearchData
    .map(item => {
  if (!item.Question) {
    item.Question = "default question"; // Default value for Question
  }
  if (!item.Answer) {
    item.Answer = "default answer"; // Default value for Answer
  }
      const question = item.Question.toLowerCase();
      const answer = item.Answer.toLowerCase();

      // Check if item meets quoted phrases, required, and excluded word conditions
      const containsQuoted = quotedPhrases.every(phrase =>
        question.includes(phrase.toLowerCase()) || answer.includes(phrase.toLowerCase())
      );
      const containsRequired = requiredWords.every(word =>
        question.includes(word.toLowerCase()) || answer.includes(word.toLowerCase())
      );
      const containsExcluded = excludedWords.some(word =>
        question.includes(word.toLowerCase()) || answer.includes(word.toLowerCase())
      );

      if (!containsQuoted || !containsRequired || containsExcluded) {
        return null; // Filter out this item
      }

      // Calculate similarity scores
      const searchForScoring = [...cleanWords, ...quotedPhrases, ...requiredWords].join(" ");
      const questionScore = similarityScore(searchForScoring, item.Question);
      const answerScore = similarityScore(searchForScoring, item.Answer);
      const maxScore = Math.max(questionScore, answerScore); // Use the highest score

      return { RowID: item.RowID, score: maxScore };
    })
    .filter(result => result && result.score > scoreTolerance) // Filter out poor matches
    .sort((a, b) => a.score - b.score); // Sort by ascending score

  // Prepare output: Group percents and RowIDs
  const percentGroup = results.map(result => Math.round(result.score * 100).toString()).join(",");
  const rowIDGroup = results.map(result => result.RowID).join(",");

  return `${percentGroup}|${rowIDGroup}`;
}

// Usage
const searchPhrase = p1;
const data = JSON.parse(p2);
const scoreTolerance = ((!p3) ? 0.25 : parseFloat(p3).toFixed(2));

const results = fuzzySearch(data, searchPhrase);

return results;
1 Like