Source code for viscid.readers.hdf5

from __future__ import print_function
import os

import numpy as np

import viscid
from viscid.compat import OrderedDict
from viscid import logger
from viscid.readers import vfile


try:
    import h5py
except ImportError as e:
    h5py = viscid.UnimportedModule(e)


[docs]class H5pyDataWrapper(vfile.DataWrapper): """ """ _hypersliceable = True # can read slices from disk fname = None loc = None comp_dim = None comp_idx = None transpose = None _shape = None _dtype = None def __init__(self, fname, loc, comp_dim=None, comp_idx=None, transpose=False): super(H5pyDataWrapper, self).__init__() self.fname = fname self.loc = loc self.comp_dim = comp_dim self.comp_idx = comp_idx self.transpose = transpose def _read_info(self): try: with h5py.File(self.fname, 'r') as f: dset = self._resolve_loc(f) self._shape = list(dset.shape) if self.comp_dim is not None: self._shape.pop(self.comp_dim) self._shape = tuple(self._shape) self._dtype = dset.dtype except IOError: logger.error("Problem opening hdf5 file, '%s'", self.fname) raise @property def shape(self): """ only ask for this if you really need it; can be a speed problem for large temporal datasets over sshfs """ if self._shape is None: self._read_info() if self.transpose: return self._shape[::-1] else: return self._shape @property def dtype(self): """ only ask for this if you really need it; can be a speed problem for large temporal datasets over sshfs """ if self._dtype is None: self._read_info() return self._dtype def __len__(self): if self.transpose: return self.shape[-1] else: return self.shape[0] def __array__(self, *args, **kwargs): arr = np.empty(self.shape, dtype=self.dtype) self.read_direct(arr) return arr def _inject_comp_slice(self, slc): if self.comp_dim is not None: new_slc = [] if slc is not None: try: new_slc = list(slc) except TypeError: new_slc = [slc] new_slc += [slice(None)] * (len(self.shape) - len(new_slc)) else: new_slc = [slice(None)] * len(self.shape) if self.comp_dim < 0: self.comp_dim += len(self.shape) + 1 if self.comp_dim < 0: raise ValueError("comp_dim can't be < -len(self.shape)") new_slc.insert(self.comp_dim, self.comp_idx) slc = tuple(new_slc) return slc
[docs] def read_direct(self, arr, **kwargs): source_sel = kwargs.pop("source_sel", None) source_sel = self._inject_comp_slice(source_sel) with h5py.File(self.fname, 'r') as f: fill_arr = arr if self.transpose: # FIXME: the temp array here isn't pretty, but transposing # the array is kind of a hack anyway. it is fixed by the # ability for fields to specify their xyz/zyx order in the # xyz branch, but that branch isn't fully tested yet fill_arr = np.empty(arr.shape[::-1], dtype=arr.dtype) self._resolve_loc(f).read_direct(fill_arr, source_sel=source_sel, **kwargs) if self.transpose: arr[...] = fill_arr.T
def __getitem__(self, item): item = self._inject_comp_slice(item) with h5py.File(self.fname, 'r') as f: arr = self._resolve_loc(f)[item] if self.transpose: return np.transpose(arr) else: return arr def _resolve_loc(self, open_file): ret, self.loc = viscid.resolve_path(open_file, self.loc, first=True) return ret
[docs]class FileLazyHDF5(vfile.VFile): """ This is for lazy wrapping an h5 file referred to by an xdmf file, or anything else where you want to have a single file instance for a single hdf5 file, but you don't want any parsing because you already know what's going to be in the file. """ _detector = None def __init__(self, fname, **kwargs): super(FileLazyHDF5, self).__init__(fname, **kwargs) def _parse(self): pass
[docs] def get_data(self, handle): return H5pyDataWrapper(self.fname, handle)
[docs] def resolve_path(self, path, first=False): with h5py.File(self.fname, 'r') as f: return viscid.resolve_path(f, path, first=first)
[docs] def find_items(self, item): return self.resolve_path(item)[0]
[docs] def find_item(self, item): return self.resolve_path(item, first=True)[0]
[docs]class FileHDF5(vfile.VFile): """ this is an abstract-ish class from which other types of hdf5 readers should derrive """ _detector = r".*\.h5\s*$" SAVE_ONLY = True _CRDS_GROUP = "/crds" _FLD_GROUPS = {"node": "/flds_nc", "cell": "/flds_cc", "face": "/flds_fc", "edge": "/flds_ec"} _XDMF_TEMPLATE_BEGIN = """<?xml version='1.0' ?> <Xdmf xmlns:xi='http://www.w3.org/2001/XInclude' Version='2.0'> <Domain> <Grid GridType="Collection" CollectionType="Spatial"> <Time Type="Single" Value="{time}" /> """ _XDMF_TEMPLATE_RECTILINEAR_GRID_BEGIN = """ <Grid Name="{grid_name}" GridType="Uniform"> <Topology TopologyType="3DRectMesh" Dimensions="{crd_dims}"/> <Geometry GeometryType="VXVYVZ"> <DataItem Name="VX" DataType="Float" Dimensions="{xdim}" Format="HDF"> {h5fname}:{xloc} </DataItem> <DataItem Name="VY" DataType="Float" Dimensions="{ydim}" Format="HDF"> {h5fname}:{yloc} </DataItem> <DataItem Name="VZ" DataType="Float" Dimensions="{zdim}" Format="HDF"> {h5fname}:{zloc} </DataItem> </Geometry> """ _XDMF_TEMPLATE_ATTRIBUTE = """ <Attribute Name="{fld_name}" AttributeType="{fld_type}" Center="{center}"> <DataItem Dimensions="{fld_dims}" NumberType="{dtype}" Precision="{precision}" Format="HDF"> {h5fname}:{fld_loc} </DataItem> </Attribute> """ _XDMF_INFO_TEMPLATE = '<Information Name="{name}" Value="{value}" />\n' _XDMF_TEMPLATE_GRID_END = """ </Grid> """ _XDMF_TEMPLATE_END = """</Grid> </Domain> </Xdmf> """ def __init__(self, fname, **kwargs): super(FileHDF5, self).__init__(fname, **kwargs) def _parse(self): raise NotImplementedError("Please load using the xdmf file")
[docs] def save(self, fname=None, **kwargs): """ save an instance of VFile, fname defaults to the name of the file object as read """ if fname is None: fname = self.fname self.save_fields(fname, self.field_dict())
[docs] @classmethod def save_fields(cls, fname, flds, complevel=0, compression='gzip', compression_opts=None, **kwargs): """ save some fields using the format given by the class """ # FIXME: this is only good for writing cartesian rectilnear flds # FIXME: axes are renamed if flds[0] is 1D or 2D assert len(flds) > 0 fname = os.path.expanduser(os.path.expandvars(fname)) if complevel and compression == 'gzip' and compression_opts is None: compression_opts = complevel # TODO: what if compression != 'gzip' do_compression = compression_opts is not None if isinstance(flds, list): if isinstance(flds[0], (list, tuple)): flds = OrderedDict(flds) else: flds = OrderedDict([(fld.name, fld) for fld in flds]) # FIXME: all coordinates are saved as non-uniform, the proper # way to do this is to have let coordinate format its own # hdf5 / xdmf / numpy binary output fld0 = next(iter(flds.values())) clist = fld0.crds.get_clist(full_arrays=True) crd_arrs = [np.array([0.0])] * 3 crd_names = ["x", "y", "z"] for i, c in enumerate(clist): crd_arrs[i] = c[1] crd_shape = [len(arr) for arr in crd_arrs] time = fld0.time # write arrays to the hdf5 file with h5py.File(fname, 'w') as f: for axis_name, arr in zip(crd_names, crd_arrs): loc = cls._CRDS_GROUP + '/' + axis_name if do_compression: f.create_dataset(loc, data=arr, compression=compression, compression_opts=compression_opts) else: f[loc] = arr for name, fld in flds.items(): loc = cls._FLD_GROUPS[fld.center.lower()] + '/' + name # xdmf files use kji ordering if do_compression: f.create_dataset(loc, data=fld.data.T, compression=compression, compression_opts=compression_opts) else: f[loc] = fld.data.T # big bad openggcm time_str hack to put basetime into hdf5 file for fld in flds.values(): try: tfmt = "%Y:%m:%d:%H:%M:%S.%f" sec_td = viscid.as_timedelta64(fld.time, 's') dtime = viscid.as_datetime(fld.basetime + sec_td).strftime(tfmt) epoch = viscid.readers.openggcm.GGCM_EPOCH ts = viscid.as_timedelta(fld.basetime - epoch).total_seconds() ts += fld.time timestr = "time= {0} {1:.16e} {2} 300c".format(fld.time, ts, dtime) f.create_group('openggcm') f['openggcm'].attrs['time_str'] = np.string_(timestr) break except viscid.NoBasetimeError: pass # now write an xdmf file xdmf_fname = os.path.splitext(fname)[0] + ".xdmf" relh5fname = "./" + os.path.basename(fname) with open(xdmf_fname, 'w') as f: xloc = cls._CRDS_GROUP + '/' + crd_names[0] yloc = cls._CRDS_GROUP + '/' + crd_names[1] zloc = cls._CRDS_GROUP + '/' + crd_names[2] dim_str = " ".join([str(l) for l in crd_shape][::-1]) f.write(cls._XDMF_TEMPLATE_BEGIN.format(time=time)) s = cls._XDMF_TEMPLATE_RECTILINEAR_GRID_BEGIN.format( grid_name="vgrid", crd_dims=dim_str, h5fname=relh5fname, xdim=crd_shape[0], ydim=crd_shape[1], zdim=crd_shape[2], xloc=xloc, yloc=yloc, zloc=zloc) f.write(s) for fld in flds.values(): _crd_system = viscid.as_crd_system(fld, None) if _crd_system: f.write(cls._XDMF_INFO_TEMPLATE.format(name="crd_system", value=_crd_system)) break for name, fld in flds.items(): fld = fld.as_flat().T dt = fld.dtype.name.rstrip("0123456789").title() precision = fld.dtype.itemsize fld_dim_str = " ".join([str(l) for l in fld.shape]) loc = cls._FLD_GROUPS[fld.center.lower()] + '/' + name s = cls._XDMF_TEMPLATE_ATTRIBUTE.format( fld_name=name, fld_type=fld.fldtype, center=fld.center.title(), dtype=dt, precision=precision, fld_dims=fld_dim_str, h5fname=relh5fname, fld_loc=loc) f.write(s) f.write(cls._XDMF_TEMPLATE_GRID_END) f.write(cls._XDMF_TEMPLATE_END)
## ## EOF ##