Source code for viscid.readers.numpy_binary

#!/usr/bin/env python
""" simple reader that tries to understand a numpy binary npz file
WARNING: not lazy
Current working assumption is that all fields in an npz file share the same
grid """

# import string
from __future__ import print_function
import os

import numpy as np

from viscid.compat import OrderedDict
from viscid import logger
from viscid.readers import vfile
from viscid import coordinate

[docs]class NPZDataWrapper(vfile.DataWrapper): """ """ fname = None loc = None _shape = None _dtype = None def __init__(self, fname, loc): super(NPZDataWrapper, self).__init__() self.fname = fname self.loc = loc def _read_info(self): # this takes super long when reading 3 hrs worth of ggcm data # over sshfs # import pdb; pdb.set_trace() try: with np.load(self.fname) as f: dset = f[self.loc] self._shape = dset.shape self._dtype = dset.dtype except IOError: logger.error("Problem opening npz file, '%s'", self.fname) raise @property def shape(self): """ only ask for this if you really need it; can be a speed problem for large temporal datasets over sshfs """ if self._shape is None: self._read_info() return self._shape @property def dtype(self): """ only ask for this if you really need it; can be a speed problem for large temporal datasets over sshfs """ if self._dtype is None: self._read_info() return self._dtype
[docs] def wrap_func(self, func_name, *args, **kwargs): with np.load(self.fname) as f: return getattr(f[self.loc], func_name)(*args, **kwargs)
def __array__(self, *args, **kwargs): return self.wrap_func("__array__", *args, **kwargs)
[docs] def read_direct(self, *args, **kwargs): raise NotImplementedError()
[docs] def len(self): return self.wrap_func("len")
def __getitem__(self, item): return self.wrap_func("__getitem__", item)
[docs]class FileNumpyNPZ(vfile.VFile): """ open an ascii file with viscid format, or if not specified, assume it's in gnuplot format, gnuplot format not yet specified """ _detector = r".*\.(npz)\s*$" _KEY_CRDS = "crd_names" _KEY_FLDS = {"node": "field_names_nc", "cell": "field_names_cc", "face": "field_names_fc", "edge": "field_names_ec"} def __init__(self, fname, **kwargs): super(FileNumpyNPZ, self).__init__(fname, **kwargs) def _wrap_lazy_field(self, parent_node, file_name, fld_name, crds, center): lazy_arr = NPZDataWrapper(file_name, fld_name) if len(lazy_arr.shape) == crds.nr_dims: fldtype = "Scalar" elif len(lazy_arr.shape) == crds.nr_dims + 1: fldtype = "Vector" else: raise IOError("can't infer field type") return self._make_field(parent_node, fldtype, fld_name, crds, lazy_arr, center=center) def _parse(self): g = self._make_grid(self, **self._grid_opts) with np.load(self.fname) as f: fld_names = list(f.keys()) crd_names = [] # try to get crds names from an array of strings called _KEY_CRDS # else, assume it's x, y, z and see if that works try: clist = [(ax, f[ax]) for ax in f[self._KEY_CRDS]] crd_names = f[self._KEY_CRDS] fld_names.remove(self._KEY_CRDS) except KeyError: for axisname in "xyz": if axisname in f: crd_names.append(axisname) clist = [(cn, NPZDataWrapper(self.fname, cn)) for cn in crd_names] crds = coordinate.wrap_crds("nonuniform_cartesian", clist) g.set_crds(crds) for c in clist: # we should be sure by now that the keys exist fld_names.remove(c[0]) # try to get field names from arrays of nc, cc, ec, fc # fields for fld_center, names_key in self._KEY_FLDS.items(): try: names = f[names_key] fld_names.remove(names_key) except KeyError: names = [] for name in names: fld = self._wrap_lazy_field(g, self.fname, name, crds, fld_center) g.add_field(fld) fld_names.remove(name) # load any remaining fields as though they were node centered for name in fld_names: fld = self._wrap_lazy_field(g, self.fname, name, crds, "Node") g.add_field(fld) self.add(g) self.activate(0)
[docs] def save(self, fname=None, **kwargs): if fname is None: fname = self.fname self.save_fields(fname, self.field_dict())
[docs] @classmethod def save_fields(cls, fname, flds, **kwargs): assert len(flds) > 0 fname = os.path.expanduser(os.path.expandvars(fname)) if isinstance(flds, list): if isinstance(flds[0], (list, tuple)): flds = OrderedDict(flds) else: flds = OrderedDict([(fld.name, fld) for fld in flds]) fld_dict = {} # setup crds # FIXME: all coordinates are saved as non-uniform, the proper # way to do this is to have let coordinate format its own # hdf5 / xdmf / numpy binary output fld0 = next(iter(flds.values())) clist = fld0.crds.get_clist(full_arrays=True) axis_names = [] for axis_name, crdarr in clist: fld_dict[axis_name] = crdarr axis_names.append(axis_name) fld_dict[cls._KEY_CRDS] = np.array(axis_names) # setup fields # dict comprehension invalid in Python 2.6 # fld_names = {key.lower(): [] for key in cls._KEY_FLDS.keys()} fld_names = {} for key in cls._KEY_FLDS.keys(): fld_names[key.lower()] = [] for name, fld in flds.items(): fld_names[fld.center.lower()].append(name) fld_dict[name] = fld.data for center, names_lst in fld_names.items(): fld_dict[cls._KEY_FLDS[center.lower()]] = np.array(names_lst) if fname.endswith(".npz"): fname = fname[:-4] np.savez(fname, **fld_dict)
## ## EOF ##