LorenFrankLab / rec_to_nwb

Data Migration REC -> NWB 2.0 Service
Other
2 stars 8 forks source link

nwb file initiatialized a wrong size. could not write file. nwb builder is fine, but cannot write into nwb. #24

Closed shijiegu closed 2 years ago

shijiegu commented 2 years ago
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<timed exec> in <module>

~/Documents/rec_to_nwb/rec_to_nwb/processing/builder/raw_to_nwb_builder.py in build_nwb(self, run_preprocessing, process_mda_valid_time, process_mda_invalid_time, process_pos_valid_time, process_pos_invalid_time)
    228             process_mda_invalid_time=process_mda_invalid_time,
    229             process_pos_valid_time=process_pos_valid_time,
--> 230             process_pos_invalid_time=process_pos_invalid_time)
    231         logger.info('Done...\n')
    232 

~/Documents/rec_to_nwb/rec_to_nwb/processing/builder/raw_to_nwb_builder.py in __build_nwb_file(self, process_mda_valid_time, process_mda_invalid_time, process_pos_valid_time, process_pos_invalid_time)
    243             nwb_builder = self.get_nwb_builder(date)
    244             content = nwb_builder.build()
--> 245             nwb_builder.write(content)
    246             if self.is_old_dataset:
    247                 logger.info('(old dataset: skipping append_to_nwb)')

~/Documents/rec_to_nwb/rec_to_nwb/processing/builder/nwb_file_builder.py in write(self, content)
    430         logger.info('Writing down content to ' + self.output_file)
    431         with NWBHDF5IO(path=self.output_file, mode='w') as nwb_fileIO:
--> 432             nwb_fileIO.write(content)
    433             nwb_fileIO.close()
    434 

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write(self, **kwargs)
    405 
    406         cache_spec = popargs('cache_spec', kwargs)
--> 407         call_docval_func(super().write, kwargs)
    408         if cache_spec:
    409             self.__cache_spec()

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in call_docval_func(func, kwargs)
    422 def call_docval_func(func, kwargs):
    423     fargs, fkwargs = fmt_docval_args(func, kwargs)
--> 424     return func(*fargs, **fkwargs)
    425 
    426 

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/io.py in write(self, **kwargs)
     48         container = popargs('container', kwargs)
     49         f_builder = self.__manager.build(container, source=self.__source, root=True)
---> 50         self.write_builder(f_builder, **kwargs)
     51 
     52     @docval({'name': 'src_io', 'type': 'HDMFIO', 'doc': 'the HDMFIO object for reading the data to export'},

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write_builder(self, **kwargs)
    804                           % (f_builder.name, self.source, kwargs))
    805         for name, gbldr in f_builder.groups.items():
--> 806             self.write_group(self.__file, gbldr, **kwargs)
    807         for name, dbldr in f_builder.datasets.items():
    808             self.write_dataset(self.__file, dbldr, **kwargs)

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write_group(self, **kwargs)
    993             for subgroup_name, sub_builder in subgroups.items():
    994                 # do not create an empty group without attributes or links
--> 995                 self.write_group(group, sub_builder, **kwargs)
    996         # write all datasets
    997         datasets = builder.datasets

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write_group(self, **kwargs)
    993             for subgroup_name, sub_builder in subgroups.items():
    994                 # do not create an empty group without attributes or links
--> 995                 self.write_group(group, sub_builder, **kwargs)
    996         # write all datasets
    997         datasets = builder.datasets

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write_group(self, **kwargs)
    993             for subgroup_name, sub_builder in subgroups.items():
    994                 # do not create an empty group without attributes or links
--> 995                 self.write_group(group, sub_builder, **kwargs)
    996         # write all datasets
    997         datasets = builder.datasets

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write_group(self, **kwargs)
    998         if datasets:
    999             for dset_name, sub_builder in datasets.items():
-> 1000                 self.write_dataset(group, sub_builder, **kwargs)
   1001         # write all links
   1002         links = builder.links

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    581             def func_call(*args, **kwargs):
    582                 pargs = _check_args(args, kwargs)
--> 583                 return func(args[0], **pargs)
    584         else:
    585             def func_call(*args, **kwargs):

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write_dataset(self, **kwargs)
   1294         self.__set_written(builder)
   1295         if exhaust_dci:
-> 1296             self.__exhaust_dcis()
   1297 
   1298     @classmethod

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in __exhaust_dcis(self)
    845             self.logger.debug("Exhausting DataChunkIterator from queue (length %d)" % len(self.__dci_queue))
    846             dset, data = self.__dci_queue.popleft()
--> 847             if self.__write_chunk__(dset, data):
    848                 self.__dci_queue.append((dset, data))
    849 

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in __write_chunk__(cls, dset, data)
   1391         dset.id.extend(max_bounds)
   1392         # Write the data
-> 1393         dset[chunk_i.selection] = chunk_i.data
   1394 
   1395         return True

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/h5py/_hl/dataset.py in __setitem__(self, args, val)
    947 
    948         # Perform the write, with broadcasting
--> 949         mspace = h5s.create_simple(selection.expand_shape(mshape))
    950         for fspace in selection.broadcast(mshape):
    951             self.id.write(mspace, fspace, val, mtype, dxpl=self._dxpl)

~/anaconda3/envs/rec_to_nwb/lib/python3.7/site-packages/h5py/_hl/selections.py in expand_shape(self, source_shape)
    262                     eshape.append(t)
    263                 else:
--> 264                     raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape))  # array shape
    265 
    266         if any([n > 1 for n in remaining_src_dims]):

TypeError: Can't broadcast (112007,) -> (112008,)
shijiegu commented 2 years ago

Upon digging deeper, I realized the issue is that the position timestamps have duplicates. This creates a mismatch between the actual data shape and functions that return the datashape. In this case of the position timestamp manager, which inherits timestamps manager, the shape() function which nwb builder uses to initialize the empty array may call this function defined in timestamps manager.

def _get_data_shape(self, dataset_num):
        return np.shape(self.read_timestamps_ids(dataset_num))[0]

Yet in the position timestamp manager's function, which is retrieve_real_timestamps to return the actual data returns a shape different from the read_timestamps_ids() in the case of PTP data.

shijiegu commented 2 years ago

The issue is resolved by a bug fix by Eric.