-
Notifications
You must be signed in to change notification settings - Fork 22
Open
Milestone
Description
With the changes in 1.3.7 we are sporadically getting the following error:
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "/codemill/bessen/versioned_hdf5_env/lib64/python3.7/site-packages/h5py/_hl/files.py", line 461, in __exit__
self.close()
File "/codemill/bessen/versioned_hdf5_env/lib64/python3.7/site-packages/h5py/_hl/files.py", line 431, in close
id_list = h5f.get_obj_ids(self.id, ~h5f.OBJ_FILE)
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "h5py/h5f.pyx", line 269, in h5py.h5f.get_obj_ids
RuntimeError: Can't increment id ref count (can't locate ID)
I have tracked this down to this change
e2509ae
which changes the way Hashtable is cached.
- Why did we need the
lru_cachein the first place? I assume this addresses some performance problem? - Is caching anything containing a reference to the
h5py.Filefinvalid anyway?
Below is a reproducer which seems to cause this to happen with high probability:
import h5py
import numpy as np
from versioned_hdf5 import VersionedHDF5File
ds_names = ['ds{}'.format(i) for i in range(10)]
num_versions = 101
def f1():
with h5py.File('foo.h5', 'w') as f:
vf = VersionedHDF5File(f)
with vf.stage_version('0') as sv:
data_group = sv.create_group('data')
for name in ds_names:
data_group.create_dataset(name, data=np.arange(4))
def f2():
for i in range(1, num_versions):
with h5py.File('foo.h5', 'r+') as f:
vf = VersionedHDF5File(f)
with vf.stage_version(str(i)) as sv:
data_group = sv['data']
for name in ds_names:
ds = data_group[name]
ds.resize((i + 4,))
ds[:] = np.arange(i, 2 * i + 4)
def f3():
with h5py.File('foo.h5', 'r') as f:
versions = f['_version_data/versions']
version_names = list(versions.keys())
return version_names
def f4(version_names):
with h5py.File('foo.h5', 'r') as f:
vf = VersionedHDF5File(f)
for version_name in version_names:
if version_name != '__first_version__':
cv = vf[version_name]
data_group = cv['data']
for name in ds_names:
ds = data_group[name]
_ = ds[:]
if __name__ == '__main__':
f1()
f2()
for _ in range(10):
version_names = f3()
f4(version_names)
version_names = f3()
f4(version_names)
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels