# This module should only be imported by modules in 'readers/'. If you just
# want to load files, use vfile_factory. This prevents circular imports.
#
# reader_base: provides file readers. This will eventually house the
# backend for data input.
from __future__ import print_function
# import sys
from operator import attrgetter
import os
import re
from time import time
from viscid import logger
from viscid.dataset import Dataset, DatasetTemporal
from viscid import grid
from viscid import field
from viscid.compat import string_types
[docs]def serialize_subclasses(root, _lst=None):
if _lst is None:
_lst = list()
for kls in reversed(root.__subclasses__()):
serialize_subclasses(kls, _lst=_lst)
_lst += [root]
return _lst
[docs]class DataWrapper(object):
_hypersliceable = False # can read slices from disk
_shape = None
_dtype = None
def __init__(self):
self._shape = None
self._dtype = None
@property
def shape(self):
return self._shape
@property
def dtype(self):
return self._dtype
def __array__(self, *args, **kwargs):
raise NotImplementedError()
[docs] def read_direct(self, *args, **kwargs):
raise NotImplementedError()
[docs] def len(self):
raise NotImplementedError()
def __getitem__(self, item):
raise NotImplementedError()
[docs]class VFile(Dataset):
"""Generic File
Note:
If you want a file that can load other files (like how XDMF
files need to be able to load HDF5 files) then subclass off of
:py:class:`viscid.readers.vfile_bucket.ContainerFile` instead.
Note:
Important when subclassing: Do not call the constructors for a
dataset / grid yourself, dispatch through _make_dataset and
_make_grid.
"""
# _detector is a regex string used for file type detection
_detector = None
_priority = 0
# _gc_warn = True # i dont think this is used... it should go away?
_grid_type = grid.Grid
_dataset_type = Dataset
_temporal_dataset_type = DatasetTemporal
_grid_opts = {}
SAVE_ONLY = False
parent_bucket = None
load_time = None
handle_name = None # set in VFileBucket.load_files
fname = None
dirname = None
# this is for files that stay open after being parsed,
# for instance hdf5 File object
file = None
# grids = None # already part of Dataset
def __init__(self, fname, parent_bucket=None, grid_type=None, grid_opts=None,
**kwargs):
""" """
super(VFile, self).__init__(name=fname, **kwargs)
if grid_type is not None:
self._grid_type = grid_type
if grid_opts is not None:
self.grid_opts = grid_opts
assert isinstance(self._grid_opts, dict)
self.parent_bucket = parent_bucket
self.load(fname)
[docs] def load(self, fname):
# self.unload()
fname = os.path.expanduser(os.path.expandvars(fname))
self.fname = os.path.abspath(fname)
self.dirname = os.path.dirname(self.fname)
self.set_info("_viscid_dirname", self.dirname)
self.load_time = time()
self._parse()
[docs] def reload(self):
self._clear_cache()
self.remove_all_items()
self.load(self.fname)
[docs] def unload(self, **kwargs):
"""Really unload a file, don't just clear the cache"""
self._clear_cache()
self.remove_all_items()
if self.parent_bucket:
self.parent_bucket.remove_reference(self, **kwargs)
def __exit__(self, exc_type, value, traceback):
self.unload()
return None
# some classy saving utility methods, should be sufficient to override
# save and save_fields
[docs] def save(self, fname=None, **kwargs):
""" save an instance of VFile, fname defaults to the name
of the file object as read """
raise NotImplementedError()
[docs] @classmethod
def save_grid(cls, fname, grd, **kwargs):
cls.save_fields(fname, grd.field_dict(), **kwargs)
[docs] @classmethod
def save_field(cls, fname, fld, **kwargs):
cls.save_fields(fname, {kwargs.pop('name', fld.name): fld}, **kwargs)
[docs] @classmethod
def save_fields(cls, fname, flds, **kwargs):
""" save some fields using the format given by the class """
raise NotImplementedError()
def _make_dataset(self, parent_node, dset_type="dataset", name=None,
**kwargs):
"""Use this instead of calling Dataset(...) yourself
Args:
parent_node (Dataset, Grid, or None): Hint at parent in
the tree, needed if info is used before this object
is added to its parent
grid_type (str, subclass of Dataset, optional): type of
dataset to create
"""
dset_type = dset_type.lower()
if isinstance(dset_type, string_types):
if dset_type == "dataset":
dset_type = self._dataset_type
elif dset_type == "temporal":
dset_type = self._temporal_dataset_type
else:
raise ValueError("unknown dataset type: {0}".format(dset_type))
dset = dset_type(name=name, **kwargs)
if parent_node is not None:
parent_node.prepare_child(dset)
return dset
def _make_grid(self, parent_node, grid_type=None, name=None, **kwargs):
"""Use this instead of calling Grid(...) yourself
Args:
parent_node (Dataset, Grid, or None): Hint at parent in
the tree, needed if info is used before this object
is added to its parent
grid_type (subclass of Grid, optional): if not given, use
self._grid_type
name (str, optional): self explanatory
"""
other = dict(self._grid_opts)
other.update(kwargs)
if grid_type is None:
grid_type = self._grid_type
if grid_type is None:
raise TypeError("{0} can't create grids".format(type(self)))
g = grid_type(name=name, **other)
if parent_node is not None:
parent_node.prepare_child(g)
return g
def _make_field(self, parent_node, fldtype, name, crds, data, **kwargs):
"""Use this instead of calling Grid(...) yourself
Args:
parent_node (Dataset, Grid, or None): Hint at parent in
the tree, needed if info is used before this object
is added to its parent
"""
fld = field.wrap_field(data, crds, name=name, fldtype=fldtype, **kwargs)
if parent_node is not None:
parent_node.prepare_child(fld)
return fld
def _parse(self):
# make _parse 'abstract'
raise NotImplementedError("override _parse to read a file")
@classmethod
def _detector_func(cls, fname):
return True
[docs] @classmethod
def detect_type(cls, fname, mode='r', prefer=None):
"""recursively detect a filetype using _detector regex string.
This is called recursively for all subclasses and results
further down the tree are given precedence.
TODO: move this functionality into a more robust/extendable factory
class... that can also take care of the bucket / circular
reference problem maybe
Args:
fname (str): Filename
mode (str): 'r' or 'w'
prefer (str): If multiple file types match, give some
part of the class name for the reader that you prefer
Note: THIS WILL ONLY WORK FOR CLASSES THAT HAVE ALREADY BEEN
IMPORTED. THIS IS A FRAGILE MECHANISM IN THAT SENSE.
Returns:
VFile subclass: Some reader that matches fname
"""
matched_classes = []
for kls in serialize_subclasses(cls):
if (kls._detector
and re.match(kls._detector, fname)
and kls._detector_func(fname)
):
matched_classes.append(kls)
# sort by reader priority
matched_classes.sort(key=attrgetter('_priority'), reverse=True)
ret = None
if matched_classes:
ret = matched_classes[0]
if prefer:
for kls in reversed(matched_classes):
if prefer.lower() in kls.__name__.lower():
ret = kls
return ret
[docs] @classmethod
def resolve_type(cls, ftype):
ftype = ftype.replace(' ', '').replace('_', '').replace('-', '').lower()
_idx = ftype.find('file')
if _idx >= 0:
ftype = ftype[:_idx] + ftype[_idx + len('file'):]
for filetype in reversed(cls.__subclasses__()): # pylint: disable=E1101
td = filetype.resolve_type(ftype)
if td:
return td
cls_name = cls.__name__.lower()
_idx = cls_name.find('file')
if _idx >= 0:
cls_name = cls_name[:_idx] + cls_name[_idx + len('file'):]
if ftype in cls_name:
return cls
return None
[docs] @classmethod
def group_fnames(cls, fnames):
"""Group File names
The default implementation just returns fnames, but some file
types might do something fancy here
Parameters:
fnames (list): names that can be logically grouped, as in
a bunch of file names that are different time steps
of a given run
Returns:
A list of things that can be given to the constructor of
this class
"""
return fnames
[docs] @classmethod
def collective_name_from_group(cls, group):
raise NotImplementedError()
[docs] @classmethod
def collective_name(cls, group):
"""
Parameters:
group: single file name or list of file names that would
be grouped by group_fnames
Returns:
str: a single name
"""
if not isinstance(group, (list, tuple)):
group = [group]
if len(group) > 1:
return cls.collective_name_from_group(group)
else:
return group[0]
##
## EOF
##