Closed jsrpy closed 6 years ago
I just wonder, if the process_data.py
did create the files under .DS_Store
? Otherwise why it is asking for files under that path?
deleting .DS_Store
from partitioned_data/
might fix this. if it doesn't, can you look into the partitioned_data/
and let us know what files and folders are there?
Deleting the .DS_Store
doesn't help.
My partitioned_data/
bascially have:
$ ls -a
. part_120 part_145 part_17 part_194 part_32 part_57 part_81
.. part_121 part_146 part_170 part_195 part_33 part_58 part_82
part_0 part_122 part_147 part_171 part_196 part_34 part_59 part_83
part_1 part_123 part_148 part_172 part_197 part_35 part_6 part_84
part_10 part_124 part_149 part_173 part_198 part_36 part_60 part_85
part_100 part_125 part_15 part_174 part_199 part_37 part_61 part_86
part_101 part_126 part_150 part_175 part_2 part_38 part_62 part_87
part_102 part_127 part_151 part_176 part_20 part_39 part_63 part_88
part_103 part_128 part_152 part_177 part_200 part_4 part_64 part_89
part_104 part_129 part_153 part_178 part_201 part_40 part_65 part_9
part_105 part_13 part_154 part_179 part_202 part_41 part_66 part_90
part_106 part_130 part_155 part_18 part_203 part_42 part_67 part_91
part_107 part_131 part_156 part_180 part_204 part_43 part_68 part_92
part_108 part_132 part_157 part_181 part_205 part_44 part_69 part_93
part_109 part_133 part_158 part_182 part_206 part_45 part_7 part_94
part_11 part_134 part_159 part_183 part_21 part_46 part_70 part_95
part_110 part_135 part_16 part_184 part_22 part_47 part_71 part_96
part_111 part_136 part_160 part_185 part_23 part_48 part_72 part_97
part_112 part_137 part_161 part_186 part_24 part_49 part_73 part_98
part_113 part_138 part_162 part_187 part_25 part_5 part_74 part_99
part_114 part_139 part_163 part_188 part_26 part_50 part_75
part_115 part_14 part_164 part_189 part_27 part_51 part_76
part_116 part_140 part_165 part_19 part_28 part_52 part_77
part_117 part_141 part_166 part_190 part_29 part_53 part_78
part_118 part_142 part_167 part_191 part_3 part_54 part_79
part_119 part_143 part_168 part_192 part_30 part_55 part_8
part_12 part_144 part_169 part_193 part_31 part_56 part_80
what is the error now?
The same error.
[ ] | 0% Completed | 0.1s
FileNotFoundError Traceback (most recent call last)
<ipython-input-18-427963b5bcd5> in <module>()
----> 1 fms_out = feature_matrices.compute()
2 X = pd.concat(fms_out)
~/miniconda3/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
158
~/miniconda3/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
393 keys = [x.__dask_keys__() for x in collections]
394 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 395 results = schedule(dsk, keys, **kwargs)
396 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
397
~/miniconda3/lib/python3.6/site-packages/dask/multiprocessing.py in get(dsk, keys, num_workers, func_loads, func_dumps, optimize_graph, **kwargs)
170 get_id=_process_get_id, dumps=dumps, loads=loads,
171 pack_exception=pack_exception,
--> 172 raise_exception=reraise, **kwargs)
173 finally:
174 if cleanup:
~/miniconda3/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
499 _execute_task(task, data) # Re-execute locally
500 else:
--> 501 raise_exception(exc, tb)
502 res, worker_id = loads(res_info)
503 state['cache'][key] = res
~/miniconda3/lib/python3.6/site-packages/dask/compatibility.py in reraise(exc, tb)
109 def reraise(exc, tb=None):
110 if exc.__traceback__ is not tb:
--> 111 raise exc.with_traceback(tb)
112 raise exc
113
~/miniconda3/lib/python3.6/site-packages/dask/local.py in execute_task()
270 try:
271 task, data = loads(task_info)
--> 272 result = _execute_task(task, data)
273 id = get_id()
274 result = dumps((result, id))
~/miniconda3/lib/python3.6/site-packages/dask/local.py in _execute_task()
251 func, args = arg[0], arg[1:]
252 args2 = [_execute_task(a, cache) for a in args]
--> 253 return func(*args2)
254 elif not ishashable(arg):
255 return arg
~/miniconda3/lib/python3.6/site-packages/dask/bag/core.py in reify()
1547 def reify(seq):
1548 if isinstance(seq, Iterator):
-> 1549 seq = list(seq)
1550 if seq and isinstance(seq[0], Iterator):
1551 seq = list(map(list, seq))
~/miniconda3/lib/python3.6/site-packages/dask/bag/core.py in map_chunk()
1706 yield f(**k)
1707 else:
-> 1708 for a in zip(*args):
1709 yield f(*a)
1710
~/miniconda3/lib/python3.6/site-packages/dask/bag/core.py in map_chunk()
1706 yield f(**k)
1707 else:
-> 1708 for a in zip(*args):
1709 yield f(*a)
1710
~/miniconda3/lib/python3.6/site-packages/dask/bag/core.py in map_chunk()
1707 else:
1708 for a in zip(*args):
-> 1709 yield f(*a)
1710
1711 # Check that all iterators are fully exhausted
~/Py/Featuretools/predict-next-purchase/utils.py in load_entityset()
5
6 def load_entityset(data_dir):
----> 7 order_products = pd.read_csv(os.path.join(data_dir, "order_products__prior.csv"))
8 orders = pd.read_csv(os.path.join(data_dir, "orders.csv"))
9 departments = pd.read_csv(os.path.join(data_dir, "departments.csv"))
~/miniconda3/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f()
676 skip_blank_lines=skip_blank_lines)
677
--> 678 return _read(filepath_or_buffer, kwds)
679
680 parser_f.__name__ = name
~/miniconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _read()
438
439 # Create the parser.
--> 440 parser = TextFileReader(filepath_or_buffer, **kwds)
441
442 if chunksize or iterator:
~/miniconda3/lib/python3.6/site-packages/pandas/io/parsers.py in __init__()
785 self.options['has_index_names'] = kwds['has_index_names']
786
--> 787 self._make_engine(self.engine)
788
789 def close(self):
~/miniconda3/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine()
1012 def _make_engine(self, engine='c'):
1013 if engine == 'c':
-> 1014 self._engine = CParserWrapper(self.f, **self.options)
1015 else:
1016 if engine == 'python':
~/miniconda3/lib/python3.6/site-packages/pandas/io/parsers.py in __init__()
1706 kwds['usecols'] = self.usecols
1707
-> 1708 self._reader = parsers.TextReader(src, **kwds)
1709
1710 passed_names = self.names is None
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: File b'partitioned_data/.DS_Store/order_products__prior.csv' does not exist
Can you inspect the function load_entityset
in util.py and see where the extra .DS_Store
is getting added the file path?
Urm... couldn't find any .DS_Store
in the file. And the path is just simply data_dir
+ order_products__prior.csv
.
is .DS_Store
part of the data_dir variable? if so, that means it's getting added from the function call in the notebook.
can you look for the cell in the notebook that looks like this
path = "partitioned_data/"
#_, dirnames, _ = os.walk(path).next()
dirnames = [os.path.join(path, d) for d in os.listdir(path)]
b = bag.from_sequence(dirnames)
entity_sets = b.map(utils.load_entityset)
is .DS_Store
in one of the dirnames?
Thanks for helping debug this!
I didn't find any .DS_Store
in my variables. But after I deleted the .DS_Store
file and re-run the whole thing, that line is running!
Thanks for the help! You can decide whether to close this issue now.
Great. I'll close for now. let us know if you need any additional help!
While I run the
Tutorial.ipynb
in my local machine, I got a File Not Found Error onfms_out = feature_matrices.compute()
: