bvaughn / react-highlight-words

React component to highlight words within a larger body of text
https://bvaughn.github.io/react-highlight-words/
MIT License
2.16k stars 170 forks source link

Feature proposal : allow including language specific accents as part of a highlight match #118

Closed seedy closed 4 months ago

seedy commented 4 months ago

Feature proposal

Need

I'd like my Highlighter to match characters with or without language specific accents.

Current behavior

Language specific accents don't match words.

repro: https://codesandbox.io/p/sandbox/react-highlight-words-example-forked-kkk5cz

Expected behavior

The should match Thé, Thè, 'Thê', etc.

sergei-startsev commented 4 months ago

@seedy Language accent analysis isn't a trivial task, and I'd rather keep it beyond the library's scope to ensure it remains compact and customizable.

boehm-e commented 3 months ago

for those interested in this feature, here is a working example of how to achieve this:


const removeDiacritics = (text: string) => {
  return text.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
};

const escapeRegExp = (text: string) => {
  return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
};

const findNormalizedChunks = (data: any) => {

  const normalizedText = removeDiacritics(data.textToHighlight);
  let chunks: any[] = [];
  data.searchWords.forEach((word: string) => {
    if (!word.trim()) return; // Skip empty or whitespace-only words

    const escapedWord = escapeRegExp(word.trim()); // Escape special characters
    const regex = new RegExp(escapedWord, 'gi');
    let match;
    while ((match = regex.exec(normalizedText)) != null) {
      const start = match.index;
      const end = regex.lastIndex;
      // Ensure we do not re-find the same zero-length match
      if (end === start) {
        regex.lastIndex = start + 1; // Move past this index
      }
      // Check if this match is already covered by a previously found chunk

      if (!chunks.find(c => c.start <= start && c.end >= end)) {
        chunks.push({ start, end });
      }
    }
  });

return (<Highlighter
  highlightClassName="YourHighlightClass"
  searchWords={removeDiacritics(query).split(" ").map(q => q.trim())}
  autoEscape={false}
  textToHighlight={text}
  findChunks={findNormalizedChunks}
/>)