Source code for sequana.summary

#
#  This file is part of Sequana software
#
#  Copyright (c) 2016-2022 - Sequana Development Team
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  website: https://github.com/sequana/sequana
#  documentation: http://sequana.readthedocs.io
#
##############################################################################
"""simple summary class to handle summary data with metadata"""
import json
import os
import time
from pathlib import Path

import colorlog

from sequana.lazy import pandas as pd

logger = colorlog.getLogger(__name__)


from sequana.utils.datatables_js import DataTable

__all__ = ["Summary"]


class MultiSummary(object):  # pragma: no cover
    """Helper class to read several json and create summary plots and HTML
    content"""

    def __init__(self):
        # Used in sequana_quality_control only
        print("sequana.summary warning: MultiSummary will be removed in 1.0. ")
        self.data = {}
        self.order = []

    def read_summary(self, filename, label=None):
        self.filename = filename
        data = json.load(open(self.filename, "r"))
        import os

        if label is None:
            p = Path(filename)
            label = p.name
        self.data[label] = data
        self.order.append(label)

    def remove_summary(self, label):
        if label in self.data and label in self.order:
            del self.data[label]
            self.order.pop(label)

    def get_html_table(self, user_key_list):
        df = self.get_single_data(user_key_list)
        datatable = DataTable(df, "name")
        datatable.datatable.datatable_options = {
            "pageLength": 15,
            "scrollCollapse": "false",
            "dom": "Brt",
            "buttons": ["copy", "csv"],
        }
        js = datatable.create_javascript_function()
        html = datatable.create_datatable(float_format="%.6g")
        return js + html

    def get_single_data(self, user_key_lists):
        # first get the requested data
        data = {}
        for key in self.data.keys():
            values = []
            for user_key in user_key_lists:
                value = self.data[key]
                for depth in user_key.split("/"):
                    value = value[depth]
                values.append(value)
            data[key] = values

        df = pd.DataFrame(data.values(), index=data.keys())
        df.columns = user_key_lists
        df = df.loc[self.order]
        df = df.reset_index()  # we need at least one index and one value

        return df



[docs]
class Summary(object):
    """

    .. doctest::

        >>> s = Summary("test", "chr1", data={"mean": 1})
        >>> s.name
        sequana_summary_test
        >>> s.sample_name
        chr1


    Here, we prefix the name with the "sequana_summary" tag. Then,
    we populate the sequana version and date automatically. The final
    summary content is then accessible as a dictionary::

        >>> s.as_dict()
        {'data': {'mean': 1},
         'date': 'Thu Jan 18 22:09:13 2018',
         'name': 'sequana_summary_test',
         'sample_name': 'chr1',
         'version': '0.6.3.post1'}

    You can also populate a description dictionary that will provide a
    description for the keys contained in the *data* field. For instance,
    here, the data dictionary contains only one obvious field (mean), we could
    provide a description::

        s.data_description = {"mean": "a dedicated description for the mean"}

    A more general description can also be provided::

        s.description = "bla bla bla"

    """

    def __init__(self, name, sample_name="undefined", data={}, caller=None, pipeline_version=None):
        if os.path.exists(name) and name.endswith("json"):
            with open(name, "r") as fin:
                data = json.loads(fin.read())
                self._name = data["name"]
                self.description = data["description"]
                self.pipeline_version = data.get("pipeline_version", None)
                self._data_description = data["data_description"]
                self.sample_name = data["sample_name"]
                self.data = data["data"]
                self.params = data.get("params", {})
                if "caller" in data.keys():
                    self.caller = data["caller"]
                else:
                    self.caller = "undefined"
        else:
            name = name.strip()
            assert len(name.split()) == 1, "no space allowed in the name"
            assert isinstance(data, dict), "data must be a dictionary"
            self._name = name
            self.description = ""
            self._data_description = {}
            self.sample_name = sample_name
            self.data = data
            self.caller = caller
            self.pipeline_version = pipeline_version
            self.params = {}


[docs]
    def as_dict(self):
        return {
            "name": self.name,
            "sample_name": self.sample_name,
            "version": self.version,
            "pipeline_version": self.pipeline_version,
            "date": self.date,
            "data": self.data,
            "params": self.params,
            "description": self.description,
            "data_description": self.data_description,
            "caller": self.caller,
        }



[docs]
    def add_params(self, params):
        self.params = params



[docs]
    def to_json(self, filename):
        import json

        with open(filename, "w") as fh:
            json.dump(self.as_dict(), fh, indent=4, sort_keys=True)


    @property
    def date(self):
        return time.asctime()

    @property
    def name(self):
        return "sequana_summary_" + self._name

    @property
    def version(self):
        from sequana import version

        return version

    @property
    def data_description(self):
        d = {}
        for k in self.data.keys():
            d[k] = self._data_description.get(k, None)
        return d

    @data_description.setter
    def data_description(self, desc):
        self._data_description = {}
        assert isinstance(desc, dict), "data_description must be a dictionary"
        for k, v in desc.items():
            if k not in self.data.keys():
                raise KeyError("{} not a key found in your data dictionary")
            else:
                self._data_description[k] = v