NLP Augmentation with crop is failing with error Sample larger than population or is negative.
ValueError Traceback (most recent call last)
in ()
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
14 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7550 kwds=kwds,
7551 )
-> 7552 return op.get_result()
7553
7554 def applymap(self, func) -> "DataFrame":
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_standard(self)
269
270 def apply_standard(self):
--> 271 results, res_index = self.apply_series_generator()
272
273 # wrap results
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_series_generator(self)
298 for i, v in enumerate(series_gen):
299 # ignore SettingWithCopy here in case the user mutates
--> 300 results[i] = self.f(v)
301 if isinstance(results[i], ABCSeries):
302 # If we have a view on v, we need to make a copy because
in (x)
7 print(augmented_text)
8 train_st_data_crop_aug = train_st_data
----> 9 train_st_data_crop_aug['sentence_aug'] = train_st_data_crop_aug.apply(lambda x: aug.augment(x['sentence']),axis=1) ## Delete a set of contunous word will be removed randomly¶
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in augment(self, data, n, num_thread)
113 # Single input with/without multiple input
114 else:
--> 115 augmented_results = self._parallel_augment(action_fx, clean_data, n=n, num_thread=num_thread)
116
117 if len(augmented_results) >= expected_output_num:
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in _parallel_augment(cls, action_fx, data, n, num_thread)
174 def _parallel_augment(cls, action_fx, data, n, num_thread=2):
175 pool = ThreadPool(num_thread)
--> 176 results = pool.map(action_fx, [data] * n)
177 pool.close()
178 pool.join()
/usr/lib/python3.6/multiprocessing/pool.py in map(self, func, iterable, chunksize)
264 in a list that is returned.
265 '''
--> 266 return self._map_async(func, iterable, mapstar, chunksize).get()
267
268 def starmap(self, func, iterable, chunksize=None):
/usr/lib/python3.6/multiprocessing/pool.py in get(self, timeout)
642 return self._value
643 else:
--> 644 raise self._value
645
646 def _set(self, i, obj):
/usr/lib/python3.6/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
117 job, i, func, args, kwds = task
118 try:
--> 119 result = (True, func(*args, **kwds))
120 except Exception as e:
121 if wrap_exception and func is not _helper_reraises_exception:
/usr/lib/python3.6/multiprocessing/pool.py in mapstar(args)
42
43 def mapstar(args):
---> 44 return list(map(*args))
45
46 def starmapstar(args):
/usr/local/lib/python3.6/dist-packages/nlpaug/augmenter/word/random.py in crop(self, data)
185 doc = Doc(data, self.tokenizer(data))
186
--> 187 aug_idxes = self._get_aug_range_idxes(doc.get_original_tokens())
188 aug_idxes.sort(reverse=True)
189
/usr/local/lib/python3.6/dist-packages/nlpaug/augmenter/word/word_augmenter.py in _get_aug_range_idxes(self, tokens)
105 word_idxes = [i for i, _ in enumerate(tokens[aug_cnt-1:])]
106
--> 107 start_aug_idx = self.sample(word_idxes, 1)[0]
108 aug_idxes = [start_aug_idx + _*direction for _ in range(aug_cnt)]
109
/usr/local/lib/python3.6/dist-packages/nlpaug/base_augmenter.py in sample(cls, x, num)
222 def sample(cls, x, num=None):
223 if isinstance(x, list):
--> 224 return random.sample(x, num)
225 elif isinstance(x, int):
226 return np.random.randint(1, x-1)
/usr/lib/python3.6/random.py in sample(self, population, k)
318 n = len(population)
319 if not 0 <= k <= n:
--> 320 raise ValueError("Sample larger than population or is negative")
321 result = [None] * k
322 setsize = 21 # size of a small set minus size of an empty list
ValueError: Sample larger than population or is negative
Hi,
NLP Augmentation with crop is failing with error Sample larger than population or is negative.
ValueError Traceback (most recent call last)