# coding: utf-8
#/*##########################################################################
# Copyright (C) 2016 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
#############################################################################*/
"""Nested python dictionary to HDF5 file conversion"""
import h5py
import numpy
import sys
__authors__ = ["P. Knobel"]
__license__ = "MIT"
__date__ = "22/03/2016"
string_types = (basestring,) if sys.version_info[0] == 2 else (str,)
def _prepare_hdf5_dataset(array_like):
"""Cast a python object into a numpy array in a HDF5 friendly format.
:param array_like: Input dataset in a type that can be digested by
``numpy.array()`` (`str`, `list`, `numpy.ndarray`…)
:return: ``numpy.ndarray`` ready to be written as an HDF5 dataset
"""
if isinstance(array_like, string_types):
array_like = numpy.string_(array_like)
# Ensure our data is a numpy.ndarray
if not isinstance(array_like, numpy.ndarray):
array = numpy.array(array_like)
else:
array = array_like
data_kind = array.dtype.kind
# unicode: convert to byte strings
# (http://docs.h5py.org/en/latest/strings.html)
if data_kind.lower() in ["s", "u"]:
array = numpy.asarray(array, dtype=numpy.string_)
return array
[docs]def dicttoh5(treedict, h5file, h5path='/',
mode="a", overwrite_data=False,
create_dataset_args=None):
"""Write a nested dictionary to a HDF5 file, using keys as member names.
:param treedict: Nested dictionary/tree structure with strings as keys
and array-like objects as leafs. The ``"/"`` character is not allowed
in keys.
:param h5file: HDF5 file name or handle. If a file name is provided, the
function opens the file in the specified mode and closes it again
before completing.
:param h5path: Target path in HDF5 file in which scan groups are created.
Default is root (``"/"``)
:param mode: Can be ``"r+"`` (read/write, file must exist),
``"w"`` (write, existing file is lost), ``"w-"`` (write, fail if
exists) or ``"a"`` (read/write if exists, create otherwise).
This parameter is ignored if ``h5file`` is a file handle.
:param overwrite_data: If ``True``, existing groups and datasets can be
overwritten, if ``False`` they are skipped. This parameter is only
relevant if ``h5file_mode`` is ``"r+"`` or ``"a"``.
:param create_dataset_args: Dictionary of args you want to pass to
``h5f.create_dataset``. This allows you to specify filters and
compression parameters. Don't specify ``name`` and ``data``.
Example::
from silx.io.dicttoh5 import dicttoh5
city_area = {
"Europe": {
"France": {
"Isère": {
"Grenoble": "18.44 km2"
},
"Nord": {
"Tourcoing": "15.19 km2"
},
},
},
}
create_ds_args = {'compression': "gzip",
'shuffle': True,
'fletcher32': True}
dicttoh5(city_area, "cities.h5", h5path="/area",
create_dataset_args=create_ds_args)
"""
if not isinstance(h5file, h5py.File):
h5f = h5py.File(h5file, mode)
else:
h5f = h5file
if not h5path.endswith("/"):
h5path += "/"
for key in treedict:
assert isinstance(key, string_types)
assert "/" not in key
if isinstance(treedict[key], dict) and len(treedict[key]):
# non-empty group: recurse
dicttoh5(treedict[key], h5f, h5path + key,
overwrite_data=overwrite_data,
create_dataset_args=create_dataset_args)
elif treedict[key] is None or (isinstance(treedict[key], dict)
and not len(treedict[key])):
# Create empty group
h5f.create_group(h5path + key)
else:
ds = _prepare_hdf5_dataset(treedict[key])
# can't apply filters on scalars (datasets with shape == () )
if ds.shape == () or create_dataset_args is None:
h5f.create_dataset(h5path + key,
data=ds)
else:
h5f.create_dataset(h5path + key,
data=ds,
**create_dataset_args)
if isinstance(h5file, string_types):
h5f.close()