Source code for viscid.readers.vfile_bucket

#!/usr/bin/env python

from __future__ import print_function
import os

from viscid import logger
from viscid.bucket import Bucket
from viscid.compat import OrderedDict, string_types
from viscid.readers.vfile import VFile
from viscid.vutil import slice_globbed_filenames

[docs]class VFileBucket(Bucket):
    """ manages open files, create / get with get_file_bucket() as you
    generally only need one instance, but you can construct directly
    if you need more than one manager
    """

    def __init__(self, **kwargs):
        super(VFileBucket, self).__init__(ordered=True, **kwargs)

    # This routine is just sort of confusing
    # def add(self, fname, file):
    #     absfname = os.path.abspath(fname)
    #     self[(absfname, fname)] = f

[docs]    def load_file(self, fname, index_handle=True, **kwargs):
        """ load a single file and return a vFile instance, not a list
        of vFiles like load does
        """
        fls = self.load_files(fname, index_handle=index_handle, **kwargs)
        if len(fls) == 0:
            return None
        else:
            if len(fls) > 1:
                logger.warning("Loaded > 1 file for %s, did you mean to call "
                               "load_files()?", fname)
            return fls[0]

[docs]    def load_files(self, fnames, index_handle=True, file_type=None, prefer=None,
                   force_reload=False, _add_ref=False, **kwargs):
        """Load files, and add them to the bucket

        Initialize obj before it's put into the list, whatever is returned
        is what gets stored, returning None means object init failed, do
        not add to the _objs list

        Parameters:
            fnames: a list of file names (can cantain glob patterns)
            index_handle: ??
            file_type: a class that is a subclass of VFile, if given,
                use this file type, don't use the autodetect mechanism
            kwargs: passed to file constructor

        Returns:
            A list of VFile instances. The length may not be the same
            as the length of fnames, and the order may not be the same
            in order to accomidate globs and file grouping.
        """
        orig_fnames = fnames

        if not isinstance(fnames, (list, tuple)):
            fnames = [fnames]
        file_lst = []

        # glob and convert to absolute paths
        globbed_fnames = []
        for fname in fnames:
            slglob = slice_globbed_filenames(fname)
            if isinstance(slglob, string_types):
                slglob = [slglob]
            globbed_fnames += slglob
            # print(">>", fname)
            # print("==", globbed_fnames)
            # expanded_fname = os.path.expanduser(os.path.expandvars(fname))
            # absfname = os.path.abspath(expanded_fname)
            # if '*' in absfname or '?' in absfname:
            #     globbed_fnames += glob(absfname)
            # else:
            #     globbed_fnames += [absfname]
            # Is it necessary to recall abspath here? We did it before
            # the glob to make sure it didn't start with a '.' since that
            # tells glob not to fill wildcards
        fnames = globbed_fnames

        # detect file types
        types_detected = OrderedDict()
        for i, fname in enumerate(fnames):
            _ftype = None
            if file_type is None:
                _ftype = VFile.detect_type(fname, prefer=prefer)
            elif isinstance(file_type, string_types):
                _ftype = VFile.resolve_type(file_type)
            else:
                _ftype = file_type
            if not _ftype:
                raise RuntimeError("Can't determine type "
                                   "for {0}".format(fname))
            value = (fname, i)
            try:
                types_detected[_ftype].append(value)
            except KeyError:
                types_detected[_ftype] = [value]

        # see if the file's already been loaded, or load it, and add it
        # to the bucket and all that good stuff
        file_lst = []
        for ftype, vals in types_detected.items():
            names = [v[0] for v in vals]
            # group all file names of a given type
            groups = ftype.group_fnames(names)

            # iterate all the groups and add them
            for group in groups:
                f = None

                handle_name = ftype.collective_name(group)

                try:
                    f = self[handle_name]
                    if force_reload:
                        f.reload()
                except KeyError:
                    try:
                        f = ftype(group, parent_bucket=self, **kwargs)
                        f.handle_name = handle_name
                    except IOError as e:
                        s = " IOError on file: {0}\n".format(handle_name)
                        s += "              File Type: {0}\n".format(handle_name)
                        s += "              {0}".format(str(e))
                        logger.warning(s)
                    except ValueError as e:
                        # ... why am i explicitly catching ValueErrors?
                        # i'm probably breaking something by re-raising
                        # this exception, but i didn't document what :(
                        s = " ValueError on file load: {0}\n".format(handle_name)
                        s += "              File Type: {0}\n".format(handle_name)
                        s += "              {0}".format(str(e))
                        logger.warning(s)
                        # re-raise the last expection
                        raise

                self.set_item([handle_name], f, index_handle=index_handle,
                              _add_ref=_add_ref)
                file_lst.append(f)

        if len(file_lst) == 0:
            logger.warning("No files loaded for '{0}', is the path "
                           "correct?".format(orig_fnames))
        return file_lst

[docs]    def remove_item(self, item, do_unload=True):
        if do_unload:
            item.unload()
        super(VFileBucket, self).remove_item(item)

[docs]    def remove_item_by_handle(self, handle, do_unload=True):
        self.remove_item(self[handle], do_unload=do_unload)

[docs]    def remove_all_items(self, do_unload=True):
        if do_unload:
            for val in list(self.values()):
                try:
                    val.unload()
                except KeyError:
                    pass
        super(VFileBucket, self).remove_all_items()

    def __getitem__(self, handle):
        if isinstance(handle, string_types):
            handle = os.path.expanduser(os.path.expandvars(handle))
        return super(VFileBucket, self).__getitem__(handle)

    def __contains__(self, handle):
        if isinstance(handle, string_types):
            handle = os.path.expanduser(os.path.expandvars(handle))
        return super(VFileBucket, self).__contains__(handle)


[docs]class ContainerFile(VFile):  # pylint: disable=abstract-method
    """A container file is a VFile that can load other files

    The use case is always something like the relationship between XDMF
    files and HDF5 files. It's nice for an XDMF file to keep track of
    all the HDF5 Files that it refers to.
    """
    child_bucket = None
    _child_files = None
    _child_ref_count = None

    def __init__(self, fname, parent_bucket=None, **kwargs):
        if parent_bucket is None:
            self.child_bucket = VFileBucket()
        else:
            self.child_bucket = parent_bucket
        self._child_files = []
        self._child_ref_count = {}
        super(ContainerFile, self).__init__(fname, parent_bucket=parent_bucket,
                                            **kwargs)

    def _load_child_file(self, fname, **kwargs):
        """Add file to self.child_bucket and remember it for when I unload"""
        f = self.child_bucket.load_file(fname, _add_ref=True, **kwargs)
        if f is not None:
            try:
                self._child_ref_count[f.handle_name] += 1
            except KeyError:
                self._child_files.append(f)
                self._child_ref_count[f.handle_name] = 1
        return f

[docs]    def reload(self):
        for child_handle in self._child_ref_count.keys():
            self.child_bucket[child_handle].reload()
        super(ContainerFile, self).reload()

[docs]    def unload(self, **kwargs):
        for child_handle in self._child_ref_count.keys():
            ref_count = self._child_ref_count[child_handle]
            if "count" in kwargs:
                raise RuntimeError()
            if "_ref_count" in kwargs:
                raise RuntimeError()
            self.child_bucket[child_handle].unload(_ref_count=ref_count)
        super(ContainerFile, self).unload()

[docs]    def clear_cache(self):
        for child_handle in self._child_ref_count.keys():
            self.child_bucket[child_handle].clear_cache()
        super(ContainerFile, self).clear_cache()

##
## EOF
##