linnarsson-lab / FISHscale

Spatial analysis of FISH data
28 stars 9 forks source link

Index Error when loading dataset #29

Open nhuytan opened 2 months ago

nhuytan commented 2 months ago

I got error when loading dataset. Any suggestion to fix that

IndexError: index 8389024 is out of bounds for axis 0 with size 66462

{ "name": "IndexError", "message": "index 8389024 is out of bounds for axis 0 with size 66462", "stack": "--------------------------------------------------------------------------- IndexError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_22712\2632409830.py in 13 verbose=True, 14 exclude_genes=['Control1', 'Control2', 'Control3', 'Control4', 'Control5','Control6', 'Control7', 'Control8'], ---> 15 z=[-140, 600, 1200, 1810, 2420, 3000, 3600]) 16 17 # Have olfactory bulb pointing left.

c:\users\anonymous\fishscale\FISHscale\utils\dataset.py in init(self, data, data_folder, unique_genes, MultiDataset_name, color_input, verbose, grid_layout, columns_layout, x_label, y_label, z_label, gene_label, other_columns, exclude_genes, z, pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, parse_num_threads) 732 self.load_from_files(data, x_label, y_label, z_label, z, gene_label, other_columns, unique_genes, exclude_genes, 733 pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, color_input, --> 734 parse_num_threads) 735 else: 736 raise Exception(f'Input for \"data\" not understood. Should be list with initiated Datasets or valid path to files.')

c:\users\anonymous\fishscale\FISHscale\utils\dataset.py in load_from_files(self, filepath, x_label, y_label, z_label, z, gene_label, other_columns, unique_genes, exclude_genes, pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, color_input, num_threads) 920 part_of_multidataset=True) 921 lazy_result.append(lr) --> 922 futures = dask.persist(lazy_result, num_workers=1, num_threads = num_threads) 923 self.datasets = dask.compute(futures) 924 self.datasets_names = [d.dataset_name for d in self.datasets]

c:\Anaconda\envs\my_env\lib\site-packages\dask\base.py in persist(traverse, optimize_graph, scheduler, args, kwargs) 833 postpersists.append((rebuild, a_keys, state)) 834 --> 835 results = schedule(dsk, keys, kwargs) 836 d = dict(zip(keys, results)) 837 results2 = [r({k: d[k] for k in ks}, s) for r, ks, s in postpersists]

c:\Anaconda\envs\my_env\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, kwargs) 87 get_id=_thread_get_id, 88 pack_exception=pack_exception, ---> 89 kwargs, 90 ) 91

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs) 504 _execute_task(task, data) # Re-execute locally 505 else: --> 506 raise_exception(exc, tb) 507 res, worker_id = loads(res_info) 508 state[\"cache\"][key] = res

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in reraise(exc, tb) 312 if exc.traceback is not tb: 313 raise exc.with_traceback(tb) --> 314 raise exc 315 316

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception) 217 try: 218 task, data = loads(task_info) --> 219 result = _execute_task(task, data) 220 id = get_id() 221 result = dumps((result, id))

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 117 # temporaries by their reference count and can execute certain 118 # operations in-place. --> 119 return func(*(_execute_task(a, cache) for a in args)) 120 elif not ishashable(arg): 121 return arg

c:\Anaconda\envs\my_env\lib\site-packages\dask\utils.py in apply(func, args, kwargs) 37 def apply(func, args, kwargs=None): 38 if kwargs: ---> 39 return func(*args, *kwargs) 40 else: 41 return func(args)

c:\users\anonymous\fishscale\FISHscale\utils\dataset.py in init(self, filename, x_label, y_label, z_label, z, gene_label, other_columns, unique_genes, exclude_genes, pixel_size, x_offset, y_offset, z_offset, polygon, select_valid, reparse, color_input, working_selection, verbose, part_of_multidataset, image) 206 self.load_data(self.filename, x_label, y_label, gene_label, self.other_columns, x_offset, y_offset, z_offset, 207 self.pixel_size.magnitude, unique_genes, exclude_genes, self.polygon, self.select_valid, --> 208 reparse, z_label) 209 210 #Gene metadata

c:\users\anonymous\fishscale\FISHscale\utils\data_handling.py in load_data(self, filename, x_label, y_label, gene_label, other_columns, x_offset, y_offset, z_offset, pixel_size, unique_genes, exclude_genes, polygon, select_valid, reparse, z_label) 522 #Load selected genes 523 self.df = dd.read_parquet(filter_filelist) --> 524 self.shape = (self.df.shape[0].compute(), self.df.shape[1]) 525 else: 526 #Load all genes

c:\Anaconda\envs\my_env\lib\site-packages\dask\base.py in compute(self, kwargs) 288 dask.base.compute 289 \"\"\" --> 290 (result,) = compute(self, traverse=False, kwargs) 291 return result 292

c:\Anaconda\envs\my_env\lib\site-packages\dask\base.py in compute(traverse, optimize_graph, scheduler, get, args, kwargs) 571 postcomputes.append(x.__dask_postcompute__()) 572 --> 573 results = schedule(dsk, keys, kwargs) 574 return repack([f(r, a) for r, (f, a) in zip(results, postcomputes)]) 575

c:\Anaconda\envs\my_env\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, kwargs) 87 get_id=_thread_get_id, 88 pack_exception=pack_exception, ---> 89 kwargs, 90 ) 91

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs) 504 _execute_task(task, data) # Re-execute locally 505 else: --> 506 raise_exception(exc, tb) 507 res, worker_id = loads(res_info) 508 state[\"cache\"][key] = res

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in reraise(exc, tb) 312 if exc.traceback is not tb: 313 raise exc.with_traceback(tb) --> 314 raise exc 315 316

c:\Anaconda\envs\my_env\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception) 217 try: 218 task, data = loads(task_info) --> 219 result = _execute_task(task, data) 220 id = get_id() 221 result = dumps((result, id))

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 117 # temporaries by their reference count and can execute certain 118 # operations in-place. --> 119 return func(*(_execute_task(a, cache) for a in args)) 120 elif not ishashable(arg): 121 return arg

c:\Anaconda\envs\my_env\lib\site-packages\dask\optimization.py in call(self, *args) 967 if not len(args) == len(self.inkeys): 968 raise ValueError(\"Expected %d args, got %d\" % (len(self.inkeys), len(args))) --> 969 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args))) 970 971 def reduce(self):

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in get(dsk, out, cache) 147 for key in toposort(dsk): 148 task = dsk[key] --> 149 result = _execute_task(task, cache) 150 cache[key] = result 151 result = _execute_task(out, cache)

c:\Anaconda\envs\my_env\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 117 # temporaries by their reference count and can execute certain 118 # operations in-place. --> 119 return func(*(_execute_task(a, cache) for a in args)) 120 elif not ishashable(arg): 121 return arg

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\core.py in call(self, part) 95 self.columns, 96 self.index, ---> 97 self.common_kwargs, 98 ) 99

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\core.py in read_parquet_part(fs, engine, meta, part, columns, index, kwargs) 501 dfs = [ 502 func(fs, rg, columns.copy(), index, **toolz.merge(kwargs, kw)) --> 503 for (rg, kw) in part 504 ] 505 df = concat(dfs, axis=0) if len(dfs) > 1 else dfs[0]

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\core.py in (.0) 501 dfs = [ 502 func(fs, rg, columns.copy(), index, **toolz.merge(kwargs, kw)) --> 503 for (rg, kw) in part 504 ] 505 df = concat(dfs, axis=0) if len(dfs) > 1 else dfs[0]

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\fastparquet.py in read_partition(cls, fs, pieces, columns, index, categories, root_cats, root_file_scheme, base_path, kwargs) 1031 categories=categories, 1032 index=index, -> 1033 kwargs.get(\"read\", {}), 1034 ) 1035

c:\Anaconda\envs\my_env\lib\site-packages\dask\dataframe\io\parquet\fastparquet.py in pf_to_pandas(cls, pf, fs, columns, categories, index, open_file_options, kwargs) 1125 partition_meta=pf.partition_meta, 1126 infile=infile, -> 1127 kwargs, 1128 ) 1129 start += thislen

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\api.py in read_row_group_file(self, rg, columns, categories, index, assign, partition_meta, row_filter, infile) 363 selfmade=self.selfmade, index=index, 364 assign=assign, scheme=self.file_scheme, partition_meta=partition_meta, --> 365 row_filter=row_filter 366 ) 367 if ret:

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\core.py in read_row_group(file, rg, columns, categories, schema_helper, cats, selfmade, index, assign, scheme, partition_meta, row_filter) 607 raise RuntimeError('Going with pre-allocation!') 608 read_row_group_arrays(file, rg, columns, categories, schema_helper, --> 609 cats, selfmade, assign=assign, row_filter=row_filter) 610 611 for cat in cats:

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\core.py in read_row_group_arrays(file, rg, columns, categories, schema_helper, cats, selfmade, assign, row_filter) 581 selfmade=selfmade, assign=out[name], 582 catdef=out.get(name+'-catdef', None), --> 583 row_filter=row_filter) 584 585 if _is_map_like(schema_helper, column):

c:\Anaconda\envs\my_env\lib\site-packages\fastparquet\core.py in read_col(column, schema_helper, infile, use_cat, selfmade, assign, catdef, row_filter) 547 piece[:] = i.codes 548 elif d and not use_cat: --> 549 piece[:] = dic[val] 550 elif not use_cat: 551 piece[:] = convert(val, se)

IndexError: index 8389024 is out of bounds for axis 0 with size 66462" }