Closed stephbuon closed 3 years ago
--------------------------------------------------------------------------- RemoteTraceback Traceback (most recent call last) RemoteTraceback: """ Traceback (most recent call last): File "/hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py", line 119, in worker result = (True, func(*args, **kwds)) File "/hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py", line 44, in mapstar return list(map(*args)) File "<ipython-input-12-ea958b28fd64>", line 11, in str_split_df_sentences df['speech'] = df['speech'].apply(lambda x: re.split(split_rule, x)) File "/hpc/applications/anaconda/3/lib/python3.6/site-packages/pandas/core/series.py", line 4045, in apply mapped = lib.map_infer(values, f, convert=convert_dtype) File "pandas/_libs/lib.pyx", line 2228, in pandas._libs.lib.map_infer File "<ipython-input-12-ea958b28fd64>", line 11, in <lambda> df['speech'] = df['speech'].apply(lambda x: re.split(split_rule, x)) File "/hpc/applications/anaconda/3/lib/python3.6/re.py", line 212, in split return _compile(pattern, flags).split(string, maxsplit) TypeError: expected string or bytes-like object """ The above exception was the direct cause of the following exception: TypeError Traceback (most recent call last) <ipython-input-16-db18acefc095> in <module> 23 #interval_subset(hansard, 'year', 1800, 2010, 5, fname=data_name) # turn this into a variable I can just name once 24 print('Exporting models to ' + data_name + '_subsets...') ---> 25 w2v_export_gensim_models(data_name +'_subsets/', n_cores=24) <ipython-input-13-f2bf6d5cf0c2> in w2v_export_gensim_models(dir_path, n_cores) 32 imported_data = pd.read_csv(dir_path + fname, encoding = 'ISO-8859-1', engine='c', error_bad_lines = False) 33 ---> 34 sentences_df = parallelize_operation(imported_data, str_split_df_sentences, n_cores) 35 sentences_df = parallelize_operation(sentences_df, lemmatize_df_text, n_cores) 36 <ipython-input-10-fe72a9328ece> in parallelize_operation(df, function, n_cores) 7 split_df = np.array_split(df, n_cores) 8 pool = Pool(n_cores) ----> 9 df = pd.concat(pool.map(function, split_df)) 10 pool.close() 11 pool.join() /hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py in map(self, func, iterable, chunksize) 286 in a list that is returned. 287 ''' --> 288 return self._map_async(func, iterable, mapstar, chunksize).get() 289 290 def starmap(self, func, iterable, chunksize=None): /hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py in get(self, timeout) 668 return self._value 669 else: --> 670 raise self._value 671 672 def _set(self, i, obj): TypeError: expected string or bytes-like object