stephbuon / democracy-lab

Code, manuals, and concepts for Democracy Lab research and affiliate projects.
MIT License
0 stars 0 forks source link

Fix this type error #98

Closed stephbuon closed 3 years ago

stephbuon commented 3 years ago
---------------------------------------------------------------------------
RemoteTraceback                           Traceback (most recent call last)
RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "<ipython-input-12-ea958b28fd64>", line 11, in str_split_df_sentences
    df['speech'] = df['speech'].apply(lambda x: re.split(split_rule, x))
  File "/hpc/applications/anaconda/3/lib/python3.6/site-packages/pandas/core/series.py", line 4045, in apply
    mapped = lib.map_infer(values, f, convert=convert_dtype)
  File "pandas/_libs/lib.pyx", line 2228, in pandas._libs.lib.map_infer
  File "<ipython-input-12-ea958b28fd64>", line 11, in <lambda>
    df['speech'] = df['speech'].apply(lambda x: re.split(split_rule, x))
  File "/hpc/applications/anaconda/3/lib/python3.6/re.py", line 212, in split
    return _compile(pattern, flags).split(string, maxsplit)
TypeError: expected string or bytes-like object
"""

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
<ipython-input-16-db18acefc095> in <module>
     23     #interval_subset(hansard, 'year', 1800, 2010, 5, fname=data_name) # turn this into a variable I can just name once
     24     print('Exporting models to ' + data_name + '_subsets...')
---> 25     w2v_export_gensim_models(data_name +'_subsets/', n_cores=24)

<ipython-input-13-f2bf6d5cf0c2> in w2v_export_gensim_models(dir_path, n_cores)
     32                 imported_data = pd.read_csv(dir_path + fname, encoding = 'ISO-8859-1', engine='c', error_bad_lines = False)
     33 
---> 34             sentences_df = parallelize_operation(imported_data, str_split_df_sentences, n_cores)
     35             sentences_df = parallelize_operation(sentences_df, lemmatize_df_text, n_cores)
     36 

<ipython-input-10-fe72a9328ece> in parallelize_operation(df, function, n_cores)
      7     split_df = np.array_split(df, n_cores)
      8     pool = Pool(n_cores)
----> 9     df = pd.concat(pool.map(function, split_df))
     10     pool.close()
     11     pool.join()

/hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py in map(self, func, iterable, chunksize)
    286         in a list that is returned.
    287         '''
--> 288         return self._map_async(func, iterable, mapstar, chunksize).get()
    289 
    290     def starmap(self, func, iterable, chunksize=None):

/hpc/applications/anaconda/3/lib/python3.6/multiprocessing/pool.py in get(self, timeout)
    668             return self._value
    669         else:
--> 670             raise self._value
    671 
    672     def _set(self, i, obj):

TypeError: expected string or bytes-like object