Source code for sequana.datatools

#
#  This file is part of Sequana software
#
#  Copyright (c) 2016-2021 - Sequana Development Team
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  website: https://github.com/sequana/sequana
#  documentation: http://sequana.readthedocs.io
#
##############################################################################
"""Retrieve data from sequana library"""
import collections
import glob
import inspect
import os

import colorlog

logger = colorlog.getLogger(__name__)


[docs]def sequana_data(filename=None, where=None): """Return full path of a sequana resource data file. :param str filename: a valid filename to be found :param str where: one of the registered data directory (see below) :return: the path of file. See also here below in the case where filename is set to "*". .. code-block:: python from sequana import sequana_data filename = sequana_data("test.fastq") Type the function name with "*" parameter to get a list of available files. Withe where argument set, the function returns a list of files. Without the where argument, a dictionary is returned where keys correspond to the registered directories:: filenames = sequana_data("*", where="images") Registered directories are: - data - testing - images .. note:: this does not handle wildcards. The * means retrieve all files. """ from pathlib import Path import sequana sequana_path = Path(inspect.getfile(sequana)).parent sharedir = str(Path(sequana_path) / "resources") directories = ["data", "testing", "examples", "images", "scripts", "doc"] if filename == "*": found = collections.defaultdict(list) if where is not None: directories = [where] for thisdir in directories: for filename in glob.glob(sharedir + "/%s/*" % thisdir): filename = os.path.split(filename)[1] to_ignore = ["__init__.py", "__pycache__"] if filename.endswith(".pyc") or filename in to_ignore: pass else: found[thisdir].append(os.path.split(filename)[1]) if where is not None: return found[where] return found if filename is None: for thisdir in directories: print(f"From %s directory:" % thisdir) for filename in glob.glob(sharedir + "/%s/*" % thisdir): filename = os.path.split(filename)[1] to_ignore = ["__init__.py", "__pycache__"] if filename.endswith(".pyc") or filename in to_ignore: pass else: print(' - sequana("%s", "%s")' % (os.path.split(filename)[1], thisdir)) raise ValueError("Choose a valid file from the list above") # in the code one may use / or \ if where: filename = os.sep.join([sharedir, where, filename]) if os.path.exists(filename): return filename else: raise FileNotFoundError(f"unknown file {filename}. Type sequana_data() to get a list of valid names") else: def _get_valid_file(filename, directory): filename = os.sep.join([sharedir, directory, filename]) if os.path.exists(filename) is False: # pragma: no cover return False else: return filename # try to introspect the different directories # return filename if found otherwise raise error for thisdir in directories: if _get_valid_file(filename, thisdir): return _get_valid_file(filename, thisdir) raise FileNotFoundError(f"unknown file {filename}. Type sequana_data() to get a list of valid names") return filename