pylablanche / gcForest

Python implementation of deep forest method : gcForest
MIT License
417 stars 193 forks source link

ValueError: Input contains NaN, infinity or a value too large for dtype('float64'). #27

Open Jyothif opened 4 years ago

Jyothif commented 4 years ago

def word_vector(tokens,size): vec = np.zeros(size).reshape((1,size)) count = 0. for word in tokens: try: vec += model_w2v[word].reshape((1,size)) count += 1. except KeyError:#Handling the case where the token is not in vocabulary

                    continue       
if count!=0:
    vec /= count
    return vec

wordvec_arrays = np.zeros((len(tokenized_tweet),200)) for i in range(len(tokenized_tweet)): wordvec_arrays[i,:] = word_vector(tokenized_tweet[i],200) wordvec_df = pd.DataFrame(wordvec_arrays) wordvec_df.shape

train_w2v = wordvec_df.iloc[:31962,:] test_w2v = wordvec_df.iloc[31962:,:] xtrain_w2v = train_w2v.iloc[ytrain.index,:] xvalid_w2v = train_w2v.iloc[yvalid.index,:]

lreg.fit(xtrain_w2v, ytrain) prediction = lreg.predict_proba(xvalid_w2v) prediction_int = prediction[:,1] >= 0.3 prediction_int = prediction_int.astype(np.int) f1_score(yvalid, prediction_int)