mljs / ml

Machine learning tools in JavaScript
MIT License
2.52k stars 208 forks source link

Random Forest Regression error 'input must not be empty' when fed text frequency array training set #98

Open shahrin014 opened 6 years ago

shahrin014 commented 6 years ago

Hi there.

My use case is to get a movie's genre, and predict the rating that would be given. Since genre are discrete values I considered using Naive Bayes. However since I need to predict the movie rating given, I read that Random Forest can get me the desired result.

I have the following training set which is arrays of inverse document frequencies as follows. var genreList = ["Biography","Drama","History","Documentary","Action","Comedy","Thriller","Crime","Music","Family","Fantasy","Musical","Animation","Adventure","Sport","Horror","Mystery","Sci-Fi"]

var trainingset = [ [0.1111111111111111,0.05555555555555555,0.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0.125,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0.1111111111111111,0,0,0,0.041666666666666664,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0],[0.1111111111111111,0.05555555555555555,0,0,0,0,0.25,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0.1,1,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0.3333333333333333,0.25,1,0,0,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0.05555555555555555,0,0.125,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0.041666666666666664,0.041666666666666664,0,0.1,0,0,0,0,0,0,0,0,0,0],[0,0.05555555555555555,0,0,0,0,0,0,0,0.3333333333333333,0.25,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0.1111111111111111,0.05555555555555555,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],[0,0,0,0,0.041666666666666664,0.041666666666666664,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0],[0,0.05555555555555555,0,0,0,0.041666666666666664,0,0.1,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0.25,0,0,0.034482758620689655,0,0,0,0],[0,0.05555555555555555,0,0,0,0.041666666666666664,0,0.1,0,0,0,0,0,0,0,0,0,0],[0,0,0,0.125,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0.1111111111111111,0.05555555555555555,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0,0,0.25,0,0,0,0,0,0,0,0,0.5,0.3333333333333333,0],[0,0.05555555555555555,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0.1111111111111111,0.05555555555555555,0.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0.125,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0,0,0.25,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0.041666666666666664,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0],[0,0,0,0,0.041666666666666664,0.041666666666666664,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0],[0,0.05555555555555555,0,0,0,0,0,0.1,0,0,0,0,0,0,0,0,0.3333333333333333,0],[0,0,0,0,0.041666666666666664,0.041666666666666664,0,0.1,0,0,0,0,0,0,0,0,0,0],[0,0.05555555555555555,0,0,0,0,0,0.1,0,0,0,0,0,0,0,0,0.3333333333333333,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0.05555555555555555,0.2,0,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0.25,0,0,0.034482758620689655,0,0,0,0],[0.1111111111111111,0.05555555555555555,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0.07692307692307693],[0,0,0,0,0,0.041666666666666664,0,0,0,0.3333333333333333,0,0,0.07692307692307693,0,0,0,0,0],[0,0,0,0.125,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0],[0,0.05555555555555555,0,0,0,0,0.25,0,0,0,0,0,0,0,0,0.5,0,0],[0,0,0.2,0.125,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0.05555555555555555,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0],[0.1111111111111111,0.05555555555555555,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0.1111111111111111,0,0,0.125,0,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0,0.07692307692307693],[0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0,0.07692307692307693],[0,0,0,0.125,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0.05555555555555555,0.2,0,0,0,0,0,0,0,0,0,0,0.034482758620689655,0,0,0,0],[0,0,0,0,0.041666666666666664,0.041666666666666664,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0],[0,0,0,0,0,0.041666666666666664,0,0,0,0,0,0,0.07692307692307693,0.034482758620689655,0,0,0,0] ] var predictions = [7,10,8,9,7,3,7,7,10,7,5,6,7,9,8,7,7,7,9,8,7,6,8,8,10,8,7,5,5,8,6,5,6,8,8,2,6,8,7,6,6,5,9,6,6,10,7,7,6,6,10,8,9,7,8,6,8,9,9,7,6,9,7,6,7,7] However I get the following console error: Error: input must not be empty at mean (index.js:12) at squaredError (utils.js:82) at Object.regressionError [as regression] (utils.js:106) at TreeNode.bestSplit (TreeNode.js:57) at TreeNode.train (TreeNode.js:157) at DecisionTreeRegression.train (DecisionTreeRegression.js:43) at RandomForestRegression.train (RandomForestBase.js:95) at Object. (VJrxxZeJeWDr:131) at Object.invoke (angular.js:5040) at $controllerInit (angular.js:11000)

jshoyer42 commented 3 years ago

Was this ever solved? I'm having the same issue. I looked in a different thread but couldn't make any of the solutions work within the node package.