nhunzaker / speakeasy

A simple natural language tool written for NodeJS
388 stars 39 forks source link

TypeError: string.match is not a function #11

Closed ElliotCambo closed 7 years ago

ElliotCambo commented 7 years ago

Just running the module on a string i am getting this error , any ideas?

TypeError: string.match is not a function at new LexerNode (web/node_modules/speakeasy-nlp/lib/classify/pos/lexer.js:32:31) at module.exports.Lexer.lex (web/node_modules/speakeasy-nlp/lib/classify/pos/lexer.js:89:14) at Object.classify (web/node_modules/speakeasy-nlp/lib/classify/index.js:86:21) at Object.processMessage (web/src/server/Logics/ProcessMessage.js:12:20) at web/src/server/Logics/SendMessage.js:69:28 at web/src/server/Models/UserModel.js:38:13 at model.Query. (web/node_modules/mongoose/lib/model.js:3731:16) at web/node_modules/kareem/index.js:277:21 at web/node_modules/kareem/index.js:131:16 at _combinedTickCallback (internal/process/next_tick.js:67:7) at process._tickCallback (internal/process/next_tick.js:98:9)

ElliotCambo commented 7 years ago

All good, just ensured the value the match fucntion was being run on was actually a string

adding this on line 32 in speakeasy-nlp/lib/classify/pos/lexer.js string = string.toString();

shakkirptb commented 6 years ago

Root cause: "for in" loop resolution: use "for of" instead use following lexer.js

/*!
 * jsPOS
 *
 * Copyright 2010, Percy Wegmann
 * Licensed under the GNU LGPLv3 license
 * http://www.opensource.org/licenses/lgpl-3.0.html
 */

var re = {
    // http://daringfireball.net/2010/07/improved_regex_for_matching_urls
    url: /\b(?:(?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/ig,
    number: /[0-9]*\.[0-9]+|[0-9]+/ig,
    space: /\s+/ig,
    unblank: /\S/,
    punctuation: /[\/\.\,\?\!]/ig,
    file: /\S+\.\S+[^\/\?]/ig
};

var Lexer = module.exports = function (){
    // Split by urls, then numbers, then whitespace, then punctuation
    this.regexs = [re.url, re.file, re.number, re.space, re.punctuation];
};

function LexerNode(string, regex, regexs){

    var childElements = [];
    this.string = string;
    this.children = [];

    if (string) {
        this.matches = string.match(regex);
        childElements = string.split(regex);
    }

    if (!this.matches) {
        this.matches = [];
        childElements = [string];
    }

    if (!regexs.length) {
        // no more regular expressions, we're done
        this.children = childElements;
    } else {
        // descend recursively
        var nextRegex = regexs[0]
        , nextRegexes = regexs.slice(1);

        for (kid of childElements) {
            this.children.push(new LexerNode(kid, nextRegex, nextRegexes));
        }
    }
}

LexerNode.prototype.fillArray = function(array){
    count=0;
    for (child of this.children) {

        if (child.fillArray) {
            child.fillArray(array);
        } else if (re.unblank.test(child)) {
            array.push(child);
        }

        if (count < this.matches.length) {
            var match = this.matches[count];
            if (re.unblank.test(match))
                array.push(match);
        }
        count++
    }
};

LexerNode.prototype.toString = function(){
    var array = [];
    this.fillArray(array);
    return array.toString();
};

Lexer.prototype.lex = function(string){
    var array = []
    , node = new LexerNode(string, this.regexs[0], this.regexs.slice(1));
    node.fillArray(array);
    return array;
};

//var lexer = new Lexer();
//print(lexer.lex("I made $5.60 today in 1 hour of work.  The E.M.T.'s were on time, but only barely.").toString());

Thank you for the amazing pos library :)