Source code for sequana.summary

#
#  This file is part of Sequana software
#
#  Copyright (c) 2016-2022 - Sequana Development Team
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  website: https://github.com/sequana/sequana
#  documentation: http://sequana.readthedocs.io
#
##############################################################################
"""simple summary class to handle summary data with metadata"""
import json
import os
import time
from pathlib import Path

import colorlog

from sequana.lazy import pandas as pd

logger = colorlog.getLogger(__name__)


from sequana.utils.datatables_js import DataTable

__all__ = ["Summary"]


class MultiSummary(object):  # pragma: no cover
    """Helper class to read several json and create summary plots and HTML
    content"""

    def __init__(self):
        # Used in sequana_quality_control only
        print("sequana.summary warning: MultiSummary will be removed in 1.0. ")
        self.data = {}
        self.order = []

    def read_summary(self, filename, label=None):
        self.filename = filename
        data = json.load(open(self.filename, "r"))
        import os

        if label is None:
            p = Path(filename)
            label = p.name
        self.data[label] = data
        self.order.append(label)

    def remove_summary(self, label):
        if label in self.data and label in self.order:
            del self.data[label]
            self.order.pop(label)

    def get_html_table(self, user_key_list):
        df = self.get_single_data(user_key_list)
        datatable = DataTable(df, "name")
        datatable.datatable.datatable_options = {
            "pageLength": 15,
            "scrollCollapse": "false",
            "dom": "Brt",
            "buttons": ["copy", "csv"],
        }
        js = datatable.create_javascript_function()
        html = datatable.create_datatable(float_format="%.6g")
        return js + html

    def get_single_data(self, user_key_lists):
        # first get the requested data
        data = {}
        for key in self.data.keys():
            values = []
            for user_key in user_key_lists:
                value = self.data[key]
                for depth in user_key.split("/"):
                    value = value[depth]
                values.append(value)
            data[key] = values

        df = pd.DataFrame(data.values(), index=data.keys())
        df.columns = user_key_lists
        df = df.loc[self.order]
        df = df.reset_index()  # we need at least one index and one value

        return df


[docs] class Summary(object): """ .. doctest:: >>> s = Summary("test", "chr1", data={"mean": 1}) >>> s.name sequana_summary_test >>> s.sample_name chr1 Here, we prefix the name with the "sequana_summary" tag. Then, we populate the sequana version and date automatically. The final summary content is then accessible as a dictionary:: >>> s.as_dict() {'data': {'mean': 1}, 'date': 'Thu Jan 18 22:09:13 2018', 'name': 'sequana_summary_test', 'sample_name': 'chr1', 'version': '0.6.3.post1'} You can also populate a description dictionary that will provide a description for the keys contained in the *data* field. For instance, here, the data dictionary contains only one obvious field (mean), we could provide a description:: s.data_description = {"mean": "a dedicated description for the mean"} A more general description can also be provided:: s.description = "bla bla bla" """ def __init__(self, name, sample_name="undefined", data={}, caller=None, pipeline_version=None): if os.path.exists(name) and name.endswith("json"): with open(name, "r") as fin: data = json.loads(fin.read()) self._name = data["name"] self.description = data["description"] self.pipeline_version = data.get("pipeline_version", None) self._data_description = data["data_description"] self.sample_name = data["sample_name"] self.data = data["data"] self.params = data.get("params", {}) if "caller" in data.keys(): self.caller = data["caller"] else: self.caller = "undefined" else: name = name.strip() assert len(name.split()) == 1, "no space allowed in the name" assert isinstance(data, dict), "data must be a dictionary" self._name = name self.description = "" self._data_description = {} self.sample_name = sample_name self.data = data self.caller = caller self.pipeline_version = pipeline_version self.params = {}
[docs] def as_dict(self): return { "name": self.name, "sample_name": self.sample_name, "version": self.version, "pipeline_version": self.pipeline_version, "date": self.date, "data": self.data, "params": self.params, "description": self.description, "data_description": self.data_description, "caller": self.caller, }
[docs] def add_params(self, params): self.params = params
[docs] def to_json(self, filename): import json with open(filename, "w") as fh: json.dump(self.as_dict(), fh, indent=4, sort_keys=True)
@property def date(self): return time.asctime() @property def name(self): return "sequana_summary_" + self._name @property def version(self): from sequana import version return version @property def data_description(self): d = {} for k in self.data.keys(): d[k] = self._data_description.get(k, None) return d @data_description.setter def data_description(self, desc): self._data_description = {} assert isinstance(desc, dict), "data_description must be a dictionary" for k, v in desc.items(): if k not in self.data.keys(): raise KeyError("{} not a key found in your data dictionary") else: self._data_description[k] = v