NeurodataWithoutBorders / pynwb

A Python API for working with Neurodata stored in the NWB Format
https://pynwb.readthedocs.io
Other
176 stars 84 forks source link

Bug in appending to NWB file #1239

Closed lfrank closed 2 years ago

lfrank commented 4 years ago

Description

I'm getting errors when I try to open an NWB file for appending

Steps to Reproduce

Download https://www.dropbox.com/s/gxdz6zbdp6ayxhu/beans20190718s1.nwb?dl=0

Run

import os
import pynwb

nwb_file_name = '/data/nwb_builder_test_data/beans20190718s1.nwb'

in_io  = pynwb.NWBHDF5IO(nwb_file_name, 'r')
nwbf_in = in_io.read()
nwbf_out = nwbf_in.copy()

n_linked_files = 0
nwb_out_file_name = os.path.splitext(nwb_file_name)[0] + str(n_linked_files).zfill(5) + '.nwb'

print(f'writing new NWB file {nwb_out_file_name}')
with pynwb.NWBHDF5IO(nwb_out_file_name, mode='w', manager=in_io.manager) as io:
    io.write(nwbf_out)
    io.close()

io = pynwb.NWBHDF5IO(nwb_out_file_name, 'r')
nwbf = io.read()
print(f'test read of created file: {nwbf}')
io.close()
print(f'END OF TEST READ')

io = pynwb.NWBHDF5IO(nwb_out_file_name, 'a')
nwbf = io.read()
print(f'test append to created file: {nwbf}')
io.write(nwbf)
io.close()

Environment

Python Executable: Conda 
Python Version: Python  3.7
Operating System:  macOS 
HDMF Version: 1.3.0

outputfile.txt

lfrank commented 4 years ago

Another effort that produces a different error. Let me know if this should be a separate bug report:

import os
import pynwb
import ndx_fl_novela.probe
import numpy as np

nwb_file_name = '/data/nwb_builder_test_data/beans20190718s1.nwb'

in_io = pynwb.NWBHDF5IO(nwb_file_name, 'r')
nwbf_in = in_io.read()

nwbf_out = nwbf_in.copy()

n_linked_files = 0
# name the file, adding the number of links with preceeding zeros
nwb_out_file_name = os.path.splitext(nwb_file_name)[0] + str(n_linked_files).zfill(5) + '.nwb'

# write the linked file
print(f'writing new NWB file {nwb_out_file_name}')
with pynwb.NWBHDF5IO(nwb_out_file_name, mode='a', manager=in_io.manager) as io:
    io.write(nwbf_out)

in_io.close()
# test append to file 

with pynwb.NWBHDF5IO(nwb_out_file_name, "r") as io:
    nwbf_new = io.read()

test = np.ones((10,1))
nwbf_new.add_scratch(test, "test data", notes="test")

with pynwb.NWBHDF5IO(nwb_out_file_name, "a") as io:
    io.write(nwbf_new)

results in

`writing new NWB file /data/nwb_builder_test_data/beans20190718s100000.nwb
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-15-fdcd9089340d> in <module>
     31 
     32 with pynwb.NWBHDF5IO(nwb_out_file_name, "a") as io:
---> 33     io.write(nwbf_new)

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    481 
    482             if is_method:
--> 483                 return func(args[0], **parsed['args'])
    484             else:
    485                 return func(**parsed['args'])

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/backends/hdf5/h5tools.py in write(self, **kwargs)
    278 
    279         cache_spec = popargs('cache_spec', kwargs)
--> 280         call_docval_func(super().write, kwargs)
    281         if cache_spec:
    282             ref = self.__file.attrs.get(SPEC_LOC_ATTR)

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/utils.py in call_docval_func(func, kwargs)
    362 def call_docval_func(func, kwargs):
    363     fargs, fkwargs = fmt_docval_args(func, kwargs)
--> 364     return func(*fargs, **fkwargs)
    365 
    366 

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    481 
    482             if is_method:
--> 483                 return func(args[0], **parsed['args'])
    484             else:
    485                 return func(**parsed['args'])

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/backends/io.py in write(self, **kwargs)
     41     def write(self, **kwargs):
     42         container = popargs('container', kwargs)
---> 43         f_builder = self.__manager.build(container, source=self.__source)
     44         self.write_builder(f_builder, **kwargs)
     45 

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    481 
    482             if is_method:
--> 483                 return func(args[0], **parsed['args'])
    484             else:
    485                 return func(**parsed['args'])

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/manager.py in build(self, **kwargs)
    156                                          % (container.name, container.__class__.__module__,
    157                                             container.__class__.__name__))
--> 158             result = self.__type_map.build(container, self, source=source, spec_ext=spec_ext)
    159             self.prebuilt(container, result)
    160         elif container.modified or spec_ext is not None:

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    481 
    482             if is_method:
--> 483                 return func(args[0], **parsed['args'])
    484             else:
    485                 return func(**parsed['args'])

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/manager.py in build(self, **kwargs)
    743         if manager is None:
    744             manager = BuildManager(self)
--> 745         builder = obj_mapper.build(container, manager, builder=builder, source=source, spec_ext=spec_ext)
    746 
    747         # add additional attributes (namespace, data_type, object_id) to builder

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    481 
    482             if is_method:
--> 483                 return func(args[0], **parsed['args'])
    484             else:
    485                 return func(**parsed['args'])

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/objectmapper.py in build(self, **kwargs)
    562                 builder = GroupBuilder(name, parent=parent, source=source)
    563             self.__add_datasets(builder, self.__spec.datasets, container, manager, source)
--> 564             self.__add_groups(builder, self.__spec.groups, container, manager, source)
    565             self.__add_links(builder, self.__spec.links, container, manager, source)
    566         else:

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/objectmapper.py in __add_groups(self, builder, groups, container, build_manager, source)
    787                             if isinstance(item, Container):
    788                                 self.__add_containers(sub_builder, spec, item, build_manager, source, container)
--> 789                 self.__add_groups(sub_builder, spec.groups, container, build_manager, source)
    790                 empty = sub_builder.is_empty()
    791                 if not empty or (empty and isinstance(spec.quantity, int)):

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/objectmapper.py in __add_groups(self, builder, groups, container, build_manager, source)
    803                     attr_value = self.get_attr_value(spec, container, build_manager)
    804                     if attr_value is not None:
--> 805                         self.__add_containers(builder, spec, attr_value, build_manager, source, container)
    806 
    807     def __add_containers(self, builder, spec, value, build_manager, source, parent_container):

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/objectmapper.py in __add_containers(self, builder, spec, value, build_manager, source, parent_container)
    852             for container in values:
    853                 if container:
--> 854                     self.__add_containers(builder, spec, container, build_manager, source, parent_container)
    855 
    856     def __get_subspec_values(self, builder, spec, manager):

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/objectmapper.py in __add_containers(self, builder, spec, value, build_manager, source, parent_container)
    833                    value.parent is not parent_container:
    834                     if isinstance(spec, BaseStorageSpec):
--> 835                         rendered_obj = build_manager.build(value, source=source, spec_ext=spec)
    836                     else:
    837                         rendered_obj = build_manager.build(value, source=source)

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    481 
    482             if is_method:
--> 483                 return func(args[0], **parsed['args'])
    484             else:
    485                 return func(**parsed['args'])

~/opt/anaconda3/envs/nwbdj/lib/python3.7/site-packages/hdmf/build/manager.py in build(self, **kwargs)
    155                         raise ValueError("Cannot change container_source once set: '%s' %s.%s"
    156                                          % (container.name, container.__class__.__module__,
--> 157                                             container.__class__.__name__))
    158             result = self.__type_map.build(container, self, source=source, spec_ext=spec_ext)
    159             self.prebuilt(container, result)

ValueError: Cannot change container_source once set: 'e-series' pynwb.ecephys.ElectricalSeries`
bendichter commented 3 years ago

@rly it looks like you were tackling this. Is this still a bug?

anbhimi commented 3 years ago

@lfrank, I am facing the same issue while saving the NWB file. Please let me know if you found a way out. Thank You

miketrumpis commented 2 years ago

The procedure @lfrank described to make sequential writes on a shallow-copy file is still raising an exception for me. Here are the relevant versions

h5py: version: 3.6.0
hdmf: version: 3.2.1
pynwb: version: 2.0.0

Here's a gist that abstracts the procedure: https://gist.github.com/miketrumpis/1779483dd9fa43e06cc86bc43952611c

The exception now is from h5py

RuntimeError: Unable to create external link (name already exists)

And the relevant point in the traceback, the "name" of the link is the name "raw_acq" of the acquisition from the "nwb-raw.nwb" file.

Thanks!

rly commented 2 years ago

@miketrumpis Thank you for the detailed bug report on this issue. It looks like this is similar to the original issue but not exactly the same. The PR https://github.com/hdmf-dev/hdmf/pull/709 should fix the issue. Either now or once merged into the dev branch of HDMF, could you please test it out? Otherwise I can let you know when we make a new release with the bugfix.

rly commented 2 years ago

The original issue appears to still exist somewhat, but a different error results than originally described.

On read of the test file in append mode, the following error results. Strangely, reading in read mode does not result in an error.

hdmf.build.errors.ConstructError: (root/general/extracellular_ephys/electrodes GroupBuilder {'attributes': {'colnames': array(['x', 'y', 'z', 'imp', 'location', 'filtering', 'group',
       'group_name', 'hwChan', 'ntrode_id', 'channel_id', 'bad_channel',
       'rel_x', 'rel_y', 'rel_z', 'probe_shank', 'probe_electrode',
       'ref_elect_id'], dtype=object), 'description': 'metadata about extracellular electrodes', 'namespace': 'hdmf-common', 'neurodata_type': 'DynamicTable', 'object_id': 'b63348f0-aa74-4d45-822b-3dc11ca9a84e'}, 'groups': {}, 'datasets': {}, 'links': {}}, "Could not construct DynamicTable object due to: Must supply 'columns' if specifying 'colnames'")
rly commented 2 years ago

The ConstructError that I mention above is a Windows-specific error that should be fixed with https://github.com/hdmf-dev/hdmf/pull/710.

The code described in the original bug report works correctly with the latest versions of PyNWB and HDMF.

rly commented 2 years ago

The code described in @lfrank 's second comment, specifically:

with pynwb.NWBHDF5IO(nwb_out_file_name, "r") as io:
    nwbf_new = io.read()

test = np.ones((10,1))
nwbf_new.add_scratch(test, "test data", notes="test")

with pynwb.NWBHDF5IO(nwb_out_file_name, "a") as io:
    io.write(nwbf_new)

Will result in an error because nwbf_new was read by one IO object and is written using a different IO object. A better code flow would be:

with pynwb.NWBHDF5IO(nwb_out_file_name, "r") as io:
    nwbf_new = io.read()

with pynwb.NWBHDF5IO(nwb_out_file_name, "a") as io:
    nwbf_new = io.read()
    test = np.ones((10,1))
    nwbf_new.add_scratch(test, "test data", notes="test")
    io.write(nwbf_new)
rly commented 2 years ago

I believe these appending issues are resolved, but please feel free to reopen or create a new issue ticket if not.

miketrumpis commented 2 years ago

I hope I have set up the environment right, but I still see a similar result with this version combo:

pip install --no-deps "hdmf @ git+https://github.com/hdmf-dev/hdmf.git@dev"
h5py: version: 3.6.0
hdmf: version: 3.2.1.post.dev6
pynwb: version: 2.0.1

The TB has shifted somewhat to a different point

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [6], in <module>
     13 filter_container.add_electrical_series(filt_ephys)
     14 with NWBHDF5IO('nwb-etl.nwb', mode='r+', manager=etl_io.manager) as etl_io2:
---> 15     etl_io2.write(nwb_etl)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/utils.py:583, in docval.<locals>.dec.<locals>.func_call(*args, **kwargs)
    581 def func_call(*args, **kwargs):
    582     pargs = _check_args(args, kwargs)
--> 583     return func(args[0], **pargs)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py:354, in HDF5IO.write(self, **kwargs)
    349     raise UnsupportedOperation(("Cannot write to file %s in mode '%s'. "
    350                                 "Please use mode 'r+', 'w', 'w-', 'x', or 'a'")
    351                                % (self.source, self.__mode))
    353 cache_spec = popargs('cache_spec', kwargs)
--> 354 call_docval_func(super().write, kwargs)
    355 if cache_spec:
    356     self.__cache_spec()

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/utils.py:424, in call_docval_func(func, kwargs)
    422 def call_docval_func(func, kwargs):
    423     fargs, fkwargs = fmt_docval_args(func, kwargs)
--> 424     return func(*fargs, **fkwargs)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/utils.py:583, in docval.<locals>.dec.<locals>.func_call(*args, **kwargs)
    581 def func_call(*args, **kwargs):
    582     pargs = _check_args(args, kwargs)
--> 583     return func(args[0], **pargs)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/backends/io.py:51, in HDMFIO.write(self, **kwargs)
     49 container = popargs('container', kwargs)
     50 f_builder = self.__manager.build(container, source=self.__source, root=True)
---> 51 self.write_builder(f_builder, **kwargs)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/utils.py:583, in docval.<locals>.dec.<locals>.func_call(*args, **kwargs)
    581 def func_call(*args, **kwargs):
    582     pargs = _check_args(args, kwargs)
--> 583     return func(args[0], **pargs)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py:762, in HDF5IO.write_builder(self, **kwargs)
    759 self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
    760                   % (f_builder.name, self.source, kwargs))
    761 for name, gbldr in f_builder.groups.items():
--> 762     self.write_group(self.__file, gbldr, **kwargs)
    763 for name, dbldr in f_builder.datasets.items():
    764     self.write_dataset(self.__file, dbldr, **kwargs)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/utils.py:583, in docval.<locals>.dec.<locals>.func_call(*args, **kwargs)
    581 def func_call(*args, **kwargs):
    582     pargs = _check_args(args, kwargs)
--> 583     return func(args[0], **pargs)

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/hdmf/backends/hdf5/h5tools.py:938, in HDF5IO.write_group(self, **kwargs)
    936 else:
    937     self.logger.debug("    Creating group '%s'" % builder.name)
--> 938     group = parent.create_group(builder.name)
    939 # write all groups
    940 subgroups = builder.groups

File ~/.pyenv/versions/3.9.7/envs/nwb-basic/lib/python3.9/site-packages/h5py/_hl/group.py:66, in Group.create_group(self, name, track_order)
     64 name, lcpl = self._e(name, lcpl=True)
     65 gcpl = Group._gcpl_crt_order if track_order else None
---> 66 gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
     67 return Group(gid)

File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File h5py/h5g.pyx:166, in h5py.h5g.create()

ValueError: Unable to create group (name already exists)
rly commented 2 years ago

@miketrumpis could you share the code that you used that resulted in the above error? The top-level code snippet from the TB does not match the code in gist.github.com/miketrumpis/1779483dd9fa43e06cc86bc43952611c

miketrumpis commented 2 years ago

I'm sorry, that was a false alarm. I can run the gist demo with the HDMF dev branch.

I had made a small change to my notebook as an attempted work-around. Nice 👀 work!