Running with the default debug.sh configuration (but on Python 3.10), I'm seeing the below error:
[2024-05-31 03:26:59,308] p5078 {main.py:107} ERROR - Failed to execute 1_generate_data.ipynb:
---------------------------------------------------------------------------
Exception encountered at "In [10]":
---------------------------------------------------------------------------
Error Traceback (most recent call last)
Cell In[10], line 3
1 # Convert DataFrame to a CSV format string
2 csv_buffer = io.StringIO()
----> 3 df.to_csv(csv_buffer, index=False)
4 csv_data = csv_buffer.getvalue()
5 all_prompts_file = config['dir_paths']['all_prompts_file']
File ~/.cache/pypoetry/virtualenvs/fmbench-XBAYeWJo-py3.10/lib/python3.10/site-packages/pandas/core/generic.py:3902, in NDFrame.to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)
3891 df = self if isinstance(self, ABCDataFrame) else self.to_frame()
3893 formatter = DataFrameFormatter(
3894 frame=df,
3895 header=header,
(...)
3899 decimal=decimal,
3900 )
-> 3902 return DataFrameRenderer(formatter).to_csv(
3903 path_or_buf,
3904 lineterminator=lineterminator,
3905 sep=sep,
3906 encoding=encoding,
3907 errors=errors,
3908 compression=compression,
3909 quoting=quoting,
3910 columns=columns,
3911 index_label=index_label,
3912 mode=mode,
3913 chunksize=chunksize,
3914 quotechar=quotechar,
3915 date_format=date_format,
3916 doublequote=doublequote,
3917 escapechar=escapechar,
3918 storage_options=storage_options,
3919 )
File ~/.cache/pypoetry/virtualenvs/fmbench-XBAYeWJo-py3.10/lib/python3.10/site-packages/pandas/io/formats/format.py:1152, in DataFrameRenderer.to_csv(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)
1131 created_buffer = False
1133 csv_formatter = CSVFormatter(
1134 path_or_buf=path_or_buf,
1135 lineterminator=lineterminator,
(...)
1150 formatter=self.fmt,
1151 )
-> 1152 csv_formatter.save()
1154 if created_buffer:
1155 assert isinstance(path_or_buf, StringIO)
File ~/.cache/pypoetry/virtualenvs/fmbench-XBAYeWJo-py3.10/lib/python3.10/site-packages/pandas/io/formats/csvs.py:266, in CSVFormatter.save(self)
247 with get_handle(
248 self.filepath_or_buffer,
249 self.mode,
(...)
254 ) as handles:
255 # Note: self.encoding is irrelevant here
256 self.writer = csvlib.writer(
257 handles.handle,
258 lineterminator=self.lineterminator,
(...)
263 quotechar=self.quotechar,
264 )
--> 266 self._save()
File ~/.cache/pypoetry/virtualenvs/fmbench-XBAYeWJo-py3.10/lib/python3.10/site-packages/pandas/io/formats/csvs.py:271, in CSVFormatter._save(self)
269 if self._need_to_save_header:
270 self._save_header()
--> 271 self._save_body()
File ~/.cache/pypoetry/virtualenvs/fmbench-XBAYeWJo-py3.10/lib/python3.10/site-packages/pandas/io/formats/csvs.py:309, in CSVFormatter._save_body(self)
307 if start_i >= end_i:
308 break
--> 309 self._save_chunk(start_i, end_i)
File ~/.cache/pypoetry/virtualenvs/fmbench-XBAYeWJo-py3.10/lib/python3.10/site-packages/pandas/io/formats/csvs.py:320, in CSVFormatter._save_chunk(self, start_i, end_i)
317 data = [res.iget_values(i) for i in range(len(res.items))]
319 ix = self.data_index[slicer]._format_native_types(**self._number_format)
--> 320 libwriters.write_csv_rows(
321 data,
322 ix,
323 self.nlevels,
324 self.cols,
325 self.writer,
326 )
File writers.pyx:72, in pandas._libs.writers.write_csv_rows()
Error: need to escape, but no escapechar set
I believe it can be resolved by setting e.g. df.to_csv(csv_buffer, index=False, escapechar="\\"), but seems weird that other people wouldn't have encountered this already? Presumably it's pretty normal for generated prompts to contain commas and/or quote marks... From the Pandas DataFrame.to_csv doc it seems like this escapechar option has existed since at least v1 and always defaulted to None, so unlikely to have been introduced by a dependency upgrade or similar :/
Running with the default
debug.sh
configuration (but on Python 3.10), I'm seeing the below error:I believe it can be resolved by setting e.g.
df.to_csv(csv_buffer, index=False, escapechar="\\")
, but seems weird that other people wouldn't have encountered this already? Presumably it's pretty normal for generated prompts to contain commas and/or quote marks... From the Pandas DataFrame.to_csv doc it seems like thisescapechar
option has existed since at least v1 and always defaulted toNone
, so unlikely to have been introduced by a dependency upgrade or similar :/