Source code for simframe.io.writers.hdf5writer

import h5py
import numbers
import numpy as np
from typing import types
import os

from simframe.io.reader import Reader
from simframe.io.writer import Writer
from simframe.utils.simplenamespace import SimpleNamespace


[docs] class hdf5writer(Writer): """Class for writing HDF5 output files.""" def __init__(self, *args, **kwargs): filename = kwargs.pop("filename", "data") extension = kwargs.pop("extension", "hdf5") description = kwargs.pop("description", "HDF5 file format using h5py") options = kwargs.pop("options", {"com": "lzf", "comopts": None}) super().__init__( _hdf5wrapper, filename=filename, extension=extension, description=description, options=options, reader=hdf5reader, *args, **kwargs )
def _hdf5wrapper(obj, filename, com="lzf", comopts=None): """Wrapper to write object to HDF5 file. This function recursively calls a another functions thats goes through the object tree. Parameters ---------- obj : object the object to be stored in a file filename : string path to file Keywords -------- com : string compression method to be used by `h5py` comopt : compression_opts compression options, see `h5py.File`'s `create_dataset` for details """ with h5py.File(filename, "w") as hdf5file: _writehdf5(obj, hdf5file, com=com, comopts=comopts) def _writehdf5(obj, file, com="lzf", comopts=None, prefix=""): """Writes a given object to a h5py file. By default all attributes of the object are written out, excluding the ones that start with an underscore. Fields with attribute Field.save == False will be skipped. Parameters: ---------- obj : object the object to be stored in a file file : hdf5 file open hdf5 file object Keywords -------- com : string compression method to be used by `h5py` comopt : compression_opts compression options, see `h5py.File`'s `create_dataset` for details prefix : str a prefix prepended to the name of each attribute when storing with h5py """ if hasattr(obj, "_description") and obj._description is not None and prefix == "": file.create_dataset( "description", data=obj._description ) for key in obj.__dir__(): # Ignore hidden variables if key.startswith('_'): continue # Skipping items that should not be stored if hasattr(obj, "_skiplist") and key in obj._skiplist: continue # Storing the object for easier use later val = obj.__getattribute__(key) # Skip fields that should not be stored if hasattr(val, "save") and not val.save: continue # Skipping methods if isinstance(val, types.MethodType): continue name = prefix + key # Check if numpy.ndarray of strings and convert to list if isinstance(val, np.ndarray) and val.dtype.type is np.str_: val = val.tolist() # Check for number if isinstance(val, (numbers.Number, np.number)): file.create_dataset(name, data=val) # Check for tuple/list elif type(val) in [tuple, list]: if None in val: raise ValueError("HDF5 cannot store None values in lists or tuples.") # special case for list of strings if any([type(_v) is str for _v in val]): file.create_dataset( name, data=np.array(val, dtype=object), dtype=h5py.special_dtype(vlen=str), compression=com, compression_opts=comopts, ) else: file.create_dataset( name, data=val, compression=com, compression_opts=comopts ) # Check for string elif type(val) is str: file.create_dataset(name, data=val) # Check for Numpy array elif isinstance(val, np.ndarray): if val.shape == (): file.create_dataset( name, data=val, ) else: file.create_dataset( name, data=val, compression=com, compression_opts=comopts ) # Dicts not implemented, yet elif type(val) is dict: raise NotImplementedError("Storing dict not yet implemented in hdf5writer.") # Check for None elif val is None: dset = file.create_dataset( name, data=0, ) dset.attrs["None"] = True # Other objects else: _writehdf5(val, file, com=com, comopts=comopts, prefix=name + "/") class hdf5reader(Reader): """Reader class for the HDF5 writer.""" def __init__(self, writer): """HDF5 reader Parameters ---------- writer : Writer Writer object to which the reaer belongs.""" super().__init__(writer) def output(self, output): """Reads a single output file. Parameters ---------- output : str or int Path to filename to be read or number of output Returns ------- data : SimpleNamespace Namespace of data in file.""" if not isinstance(output, str): output = self._writer._getfilename(output) if not os.path.isfile(output): raise RuntimeError("File does not exist.") with h5py.File(output, "r") as hdf5file: return self._readgroup(hdf5file) def sequence(self, field): """Reading the entire sequence of a specific field. Parameters ---------- field : string String with location of requested field Returns ------- seq : array Array with requested values Notes ----- ``field`` is addressing the values just as in the parent frame object. E.g. ``"groupA.groupB.fieldC"`` is addressing ``Frame.groupA.groupB.fieldC``.""" files = self.listfiles() if files == []: raise RuntimeError("<datadir> does not exist or is empty.") if not isinstance(field, str): raise TypeError("<field> has to be of type string.") loc = field.replace(".", "/") ret = [] for f in files: with h5py.File(f, "r") as hdf5file: dset = hdf5file[loc] if dset.attrs.get("None", False): A = None else: A = np.array(hdf5file[loc][()]) ret.append(A) return np.array(ret) def _readgroup(self, gr): """Helper function that is iteratively called to get the depth of the data set. Parameters ---------- gr : Group of type h5py._hl.group.Group The h5py data set to be read Returns ------- data : SimpleNamespace Namespace of data""" ret = {} for ds in gr.keys(): if isinstance(gr[ds], h5py._hl.group.Group): ret[ds] = self._readgroup(gr[ds]) else: dset = gr[ds] if dset.attrs.get("None", False): ret[ds] = None else: ret[ds] = gr[ds][()] return SimpleNamespace(**ret)