retextjs / retext

natural language processor powered by plugins part of the @unifiedjs collective
https://unifiedjs.com
MIT License
2.36k stars 93 forks source link

processor.Parser is undefined #24

Closed mazing closed 9 years ago

mazing commented 9 years ago

If I use the example at https://github.com/wooorm/retext#usage, I get the error

C:\Users\Morten\node_modules\retext-emoji\index.js:133
    var proto = processor.Parser.prototype;
                                ^

TypeError: Cannot read property 'prototype' of undefined
    at emoji (C:\Users\Morten\node_modules\retext-emoji\index.js:133:33)
    at Retext.use (C:\Users\Morten\node_modules\retext\index.js:113:19)
    at Object.<anonymous> (C:\Users\Morten\Desktop\retext.js:8:6)
    at Module._compile (module.js:434:26)
    at Object.Module._extensions..js (module.js:452:10)
    at Module.load (module.js:355:32)
    at Function.Module._load (module.js:310:12)
    at Function.Module.runMain (module.js:475:10)
    at startup (node.js:117:18)
    at node.js:951:3

I have installed retext, retext-emoji and retext-smartypants and I'm using node v4.0.0.

wooorm commented 9 years ago

Sorry, I should put a note up. I’m in the process of updating retext, and all its plugins. Could you try installing with @next? npm install retext@next should do the trick?!

wooorm commented 9 years ago

I havent got the time to update the docs yet, but you should be able to run the example as follows:

/* Require dependencies. */
var Retext = require('retext');
var emoji = require('retext-emoji');
var smartypants = require('retext-smartypants');

/* Create an instance using retext-emoji and -smartypants. */
var retext = new Retext()
    .use(emoji, {
        'convert' : 'encode'
    })
    .use(smartypants);

/* Read a document. */
retext.process(
    'The three wise monkeys [. . .] sometimes called the ' +
    'three mystic apes--are a pictorial maxim. Together ' +
    'they embody the proverbial principle to ("see no evil, ' +
    'hear no evil, speak no evil"). The three monkeys are ' +
    'Mizaru (:see_no_evil:), covering his eyes, who sees no ' +
    'evil; Kikazaru (:hear_no_evil:), covering his ears, ' +
    'who hears no evil; and Iwazaru (:speak_no_evil:), ' +
    'covering his mouth, who speaks no evil.',
    function (err, file, doc) {
        /* Handle errors. */
        if (err) {
            throw err;
        }

        /* Log the text content of the tree (the transformed input). */
        console.log(doc);
        /**
         * This logs the following:
         *   The three wise monkeys […] sometimes called the three
         *   mystic apes—are a pictorial maxim. Together they
         *   embody the proverbial principle to (“see no evil,
         *   hear no evil, speak no evil”). The three monkeys are
         *   Mizaru (🙈), covering his eyes, who sees no evil;
         *   Kikazaru (🙉), covering his ears, who hears no evil;
         *   and Iwazaru (🙊), covering his mouth, who speaks no evil.
         */
    }
);
mazing commented 9 years ago

Oh okay. I'm just about to learn nodejs in order to use this amazing library :-)

Your example works!. Thank you!

wooorm commented 9 years ago

Wow, i’m honoured :) Great to hear it’s working. I’ll let this issue open ’till i update the readme.

mazing commented 9 years ago

Do you also have any plans for implementing https://en.wikipedia.org/wiki/Tf–idf?

wooorm commented 9 years ago

Never had the need, but i don’t doubt if someone could implement it. I’m actually not that familiar, but I could probably help you if you’d wanted to!

mazing commented 9 years ago

I will take a look at it when I become better at programming :-)

I guess it's something like

// build corpus
var document1 = "This text is about node.";
var document2 = "This text is about ruby.";
var document3 = "This text is about nothing."
var corpus = [document1, document2, document3];

// some long text
var document = "This text is about node and ruby.";

// stopwords to filter out
var stopwords = ['it', 'is', 'am'];

// split into terms
var terms = document.split(/\W+/).filter(function(token) {
  // lowercase words (ignore case)
  token = token.toLowerCase();

  // filter out short words and stopwords
  return token.length >= 2 && stopwords.indexOf(token) === -1;
});

// loop through remaining terms
for (var i=0; i<terms.length; i++) {
  var term = terms[i];

  // compute tfidf for term
  var tfidf = tfidf(term, document, corpus);

  // print
  console.log(term + '(tfidf: ' + tfidf + ')');
}

// function to compute tf-idf
function tfidf(term, document, corpus) {
  // compute term frequency (occurances in text)
  var num_occurances_in_document;
  var tf = num_occurances_in_document;

  // compute inverse document frequency as idf=log(N/df_t)
  var num_documents_in_corpus;
  var num_documents_containing_term;
  var idf = Math.log(num_documents_in_corpus / num_documents_containing_term);

  // return tf-idf
  return tf * idf;
}

but I guess many of the needed functions are already implemented in retext.

wooorm commented 9 years ago

Yup, retext has real good support for finding “words” :)

wooorm commented 9 years ago

Closed by 08a095d9748e6d3b06aad82f32752ca6d39b3aef.