I'm trying to run the summarizer on a dataframe where each row contains a body of text, the model summarizes the text, saves the output, and moves on to the next paragraph.
However, I keep getting the following ValueError:
ValueErrror: Found array with 0 sample(s) (shape=(0, 1024)) while a minimum of 1 is required.
I'm passing a series of strings of text to the summarizer (one at a time), not arrays.
Also, I'm positive that every summary being passed in is long enough.
Could there be a bug related to array size?
Traceback (most recent call last)
in
5 summaries.append(paragraph)
6 else:
----> 7 summary = model(paragraph, ratio=0.5)
8 summaries.append(summary)
~/.conda/envs/hp_summarizer/lib/python3.7/site-packages/summarizer/model_processors.py in __call__(self, body, ratio, min_length, max_length, use_first, algorithm)
40 def __call__(self, body: str, ratio: float=0.2, min_length: int=40, max_length: int=600,
41 use_first: bool=True, algorithm='kmeans') -> str:
---> 42 return self.run(body, ratio, min_length, max_length)
43
44
~/.conda/envs/hp_summarizer/lib/python3.7/site-packages/summarizer/model_processors.py in run(self, body, ratio, min_length, max_length, use_first, algorithm)
35 use_first: bool=True, algorithm='kmeans') -> str:
36 sentences = self.process_content_sentences(body, min_length, max_length)
---> 37 res = self.run_clusters(sentences, ratio, algorithm, use_first)
38 return ' '.join(res)
39
~/.conda/envs/hp_summarizer/lib/python3.7/site-packages/summarizer/model_processors.py in run_clusters(self, content, ratio, algorithm, use_first)
54 def run_clusters(self, content: List[str], ratio=0.2, algorithm='kmeans', use_first: bool= True) -> List[str]:
55 hidden = self.model(content, self.hidden, self.reduce_option)
---> 56 hidden_args = ClusterFeatures(hidden, algorithm).cluster(ratio)
57 if use_first:
58 if hidden_args[0] != 0:
~/.conda/envs/hp_summarizer/lib/python3.7/site-packages/summarizer/ClusterFeatures.py in cluster(self, ratio)
46 def cluster(self, ratio: float=0.1) -> List[int]:
47 k = 1 if ratio * len(self.features) < 1 else int(len(self.features) * ratio)
---> 48 model = self.__get_model(k).fit(self.features)
49 centroids = self.__get_centroids(model)
50 cluster_args = self.__find_closest_args(centroids)
~/.conda/envs/hp_summarizer/lib/python3.7/site-packages/sklearn/cluster/k_means_.py in fit(self, X, y, sample_weight)
967 tol=self.tol, random_state=random_state, copy_x=self.copy_x,
968 n_jobs=self.n_jobs, algorithm=self.algorithm,
--> 969 return_n_iter=True)
970 return self
971
~/.conda/envs/hp_summarizer/lib/python3.7/site-packages/sklearn/cluster/k_means_.py in k_means(X, n_clusters, sample_weight, init, precompute_distances, n_init, max_iter, verbose, tol, random_state, copy_x, n_jobs, algorithm, return_n_iter)
307 order = "C" if copy_x else None
308 X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32],
--> 309 order=order, copy=copy_x)
310 # verify that the number of samples given is larger than k
311 if _num_samples(X) < n_clusters:
~/.conda/envs/hp_summarizer/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
548 " minimum of %d is required%s."
549 % (n_samples, array.shape, ensure_min_samples,
--> 550 context))
551
552 if ensure_min_features > 0 and array.ndim == 2:
This happens when the min_length, max_length filter out sentences, and there is nothing to summarize. Closing as stale for now, but let me know if any issues arise.
I'm trying to run the summarizer on a dataframe where each row contains a body of text, the model summarizes the text, saves the output, and moves on to the next paragraph.
However, I keep getting the following ValueError: ValueErrror: Found array with 0 sample(s) (shape=(0, 1024)) while a minimum of 1 is required.
I'm passing a series of strings of text to the summarizer (one at a time), not arrays.
Also, I'm positive that every summary being passed in is long enough.
Could there be a bug related to array size?
Traceback (most recent call last)