spencermountain / compromise

modest natural-language processing
http://compromise.cool
MIT License
11.3k stars 644 forks source link

[Feature Request]: Flesch–Kincaid Function #1105

Open MarketingPip opened 2 months ago

MarketingPip commented 2 months ago

Just doing some reading and thought it might be cool feature maybe to add in Compromise. If not familiar - see: here

I could possibly implement this! Just wanting to hear thoughts.

spencermountain commented 2 months ago

hey cool! ya this would be a neat plugin. There seems to be a couple implementations that would be suitable, or would be a fun one to roll on your own. Let me know if I can help.

thegoatherder commented 2 months ago

@MarketingPip this would be very handy! Let me know if you want some input.

MarketingPip commented 2 months ago

@spencermountain & @thegoatherder - if you both maybe wanna peak over this and see if anything is wrong (not sure if Compromise is not catching all syllables...). Link to Wikipedia page here

Tho would be dope if maybe you could write some tests for this @thegoatherder - so Spencer is happy! lol

import nlp from "https://esm.sh/compromise";
import syllables from "https://esm.sh/compromise-speech";

nlp.extend(syllables); // Needed for syllable counting

function calculateFleschScores(text) {
  const doc = nlp(text);

  // Getting total sentences
  const totalSentences = doc.sentences().out("array").length;

  // Getting total words - using .terms() to count words.
  const totalWords = doc.terms().out("array").length;

  // Getting total syllables, compromise-syllables plugin helps here.
  let totalSyllables = 0;
  doc.terms().forEach((term) => {
    totalSyllables += term.syllables()[0].length;
  });

  // Calculating the Flesch Reading Ease Score
  const fleschReadingEase = (
    206.835 -
    1.015 * (totalWords / totalSentences) -
    84.6 * (totalSyllables / totalWords)
  ).toFixed(2); // rounding to 2 decimal places for readability

  // Calculating the Flesch-Kincaid Grade Level
  const fleschKincaidGrade = (
    0.39 * (totalWords / totalSentences) +
    11.8 * (totalSyllables / totalWords) -
    15.59
  ).toFixed(2); // rounding to 2 decimal places for readability

  return {
    fleschReadingEase,
    fleschKincaidGrade,
    readableScore: getScoreReadable(fleschReadingEase)
  };
}

// Test the function
const text = "The cat sat on the mat";
const {
  fleschReadingEase,
  fleschKincaidGrade,
  readableScore
} = calculateFleschScores(text);
console.log("Flesch Reading Ease Score:", fleschReadingEase, readableScore);
console.log("Flesch-Kincaid Grade Level:", fleschKincaidGrade);

function getScoreReadable(score) {
  if (score >= 100) {
    score = 100;
  }

  if (score <= 0) {
    score = 0;
  }
  const scoreRanges = [
    {
      range: "100.00–90.00",
      school_level: "5th grade",
      notes:
        "Very easy to read. Easily understood by an average 11-year-old student."
    },
    {
      range: "90.0–80.0",
      school_level: "6th grade",
      notes: "Easy to read. Conversational English for consumers."
    },
    {
      range: "80.0–70.0",
      school_level: "7th grade",
      notes: "Fairly easy to read."
    },
    {
      range: "70.0–60.0",
      school_level: "8th & 9th grade",
      notes: "Plain English. Easily understood by 13- to 15-year-old students."
    },
    {
      range: "60.0–50.0",
      school_level: "10th to 12th grade",
      notes: "Fairly difficult to read."
    },
    {
      range: "50.0–30.0",
      school_level: "College",
      notes: "Difficult to read."
    },
    {
      range: "30.0–10.0",
      school_level: "College graduate",
      notes: "Very difficult to read. Best understood by university graduates."
    },
    {
      range: "10.0–0.0",
      school_level: "Professional",
      notes:
        "Extremely difficult to read. Best understood by university graduates."
    }
  ];

  let scoreObj = null;
  score = parseFloat(score);
  if (!isNaN(score)) {
    for (let i = 0; i < scoreRanges.length; i++) {
      const range = scoreRanges[i].range.split("–");
      const lowerBound = parseFloat(range[0]);
      const upperBound = parseFloat(range[1]);
      if (score <= lowerBound && score >= upperBound) {
        scoreObj = scoreRanges[i];
        break;
      }
    }
  }
  return scoreObj;
}

Edit; possibly we could expand by making a "readability" plugin, with various methods of tests (not just these two)...

MarketingPip commented 1 month ago

@thegoatherder - hey Adam, any chance you have interest in helping with this?

thegoatherder commented 1 month ago

@MarketingPip sorry I’m on vacations for the next couple of weeks and a bit allergic to my laptop. Will see if I can find some time on return.

Testing approach would be to gather some texts with known scores from online sources and plug them in and expect the known score.

MarketingPip commented 1 month ago

@MarketingPip sorry I’m on vacations for the next couple of weeks and a bit allergic to my laptop. Will see if I can find some time on return.

Testing approach would be to gather some texts with known scores from online sources and plug them in and expect the known score.

Problem is I have noticed different values from tools online lol!

Enjoy your vacation in mean time! Would be dope for some input / help on this - make sure no missed corners etc on both ends! :D