Source code for sequana.viz.volcano

# -*- coding: utf-8 -*-
#
#  This file is part of Sequana software
#
#  Copyright (c) 2016 - Sequana Development Team
#
#  File author(s):
#      Thomas Cokelaer <thomas.cokelaer@pasteur.fr>
#      Etienne Kornobis <etienne.kornobis@pasteur.fr>
#
# This is a copy of the biokit.viz.volcano module (from myself) since
# biokit will not be maintained in the future (merging its content
# into sequana.
#
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  website: https://github.com/sequana/sequana
#  documentation: http://sequana.readthedocs.io
#
##############################################################################
"""Volcano plot"""

from adjustText import adjust_text

from sequana.lazy import numpy as np
from sequana.lazy import pandas as pd
from sequana.lazy import pylab as pylab

__all__ = ["Volcano"]


[docs]class Volcano(object): """Volcano plot In essence, just a scatter plot with annotations. .. plot:: :width: 80% :include-source: import numpy as np fc = np.random.randn(1000) pvalue = np.random.randn(1000) import pandas as pd data = pd.DataFrame([fc, pvalue]) data = data.T data.columns = ['log2FoldChange', 'padj'] from sequana.viz import Volcano v = Volcano(data) v.plot() """ def __init__( self, data=None, log2fc_col="log2FoldChange", pvalues_col="padj", annot_col="", color="auto", pvalue_threshold=-np.log10(0.05), log2fc_threshold=1, ): """.. rubric:: constructor :param DataFrame data: Pandas DataFrame with rnadiff results. :param log2fc_col: Name of the column with log2 Fold changes. :param pvalues_col: Name of the column with adjusted pvalues. :param annot_col: Name of the column with genes names for plot annotation. :param color: for color choice :param pvalue_threshold: Adjusted pvalue threshold to use for coloring/annotation. :param log2fc_threshold: Log2 Fold Change threshold to use for coloring/annotation. """ self.color = color self.pvalue_threshold = pvalue_threshold self.log2fc_threshold = log2fc_threshold df = data.copy() df.rename( columns={ log2fc_col: "log2fc", annot_col: "annot", }, inplace=True, ) df["log10pval"] = -np.log10(df[pvalues_col]) self.df = df # self.df = df.loc[df.log10pval.notna()] self._get_colors() def _get_colors(self): """Add colors according to pvalue and fold change thresholds""" def coloring(row): if (row.log2fc <= -self.log2fc_threshold) and (row.log10pval >= self.pvalue_threshold): return "royalblue" if (row.log2fc >= self.log2fc_threshold) and (row.log10pval >= self.pvalue_threshold): return "firebrick" if (abs(row.log2fc) < self.log2fc_threshold) and (row.log10pval >= self.pvalue_threshold): return "black" else: return "lightgrey" if self.color == "auto": self.df["color"] = self.df.apply(coloring, axis=1) else: # pragma: no cover self.df["color"] = self.color
[docs] def annotate(self, **kwargs): texts = [] for row in self.df.itertuples(index=False): if not row.annot: continue if (row.log10pval >= self.pvalue_threshold) and (abs(row.log2fc) >= self.log2fc_threshold): texts.append(pylab.text(row.log2fc, row.log10pval, row.annot)) adjust_text(texts, **kwargs)
[docs] def plot( self, size=10, alpha=0.7, marker="o", fontsize=16, xlabel="fold change", logy=False, threshold_lines={"color": "black", "ls": "--", "width": 0.5}, ylabel="p-value", add_broken_axes=False, broken_axes={"ylims": ((0, 10), (50, 100))}, ): """ :param size: size of the markers :param alpha: transparency of the marker :param fontsize: :param xlabel: :param ylabel: :param center: If centering the x axis """ pylab.clf() if add_broken_axes: # pragma: no cover from brokenaxes import brokenaxes _ylims = broken_axes.get("ylims", None) _xlims = broken_axes.get("xlims", None) bax = brokenaxes(ylims=_ylims, xlims=_xlims) else: bax = pylab bax.scatter( self.df.log2fc, self.df.log10pval, s=size, alpha=alpha, c=self.df.color, marker=marker, edgecolors="None", ) # bax.grid() # pylab.ylim([0, pylab.ylim()[1]]) # M = max(abs(self.log2fc)) * 1.1 # pylab.xlim([-M, M]) try: bax.set_xlabel(xlabel, fontsize=fontsize) bax.set_ylabel(ylabel, fontsize=fontsize) except: bax.xlabel(xlabel, fontsize=fontsize) bax.ylabel(ylabel, fontsize=fontsize) bax.axhline( self.pvalue_threshold, color=threshold_lines["color"], linestyle=threshold_lines["ls"], linewidth=threshold_lines["width"], ) bax.axvline( self.log2fc_threshold, color=threshold_lines["color"], linestyle=threshold_lines["ls"], linewidth=threshold_lines["width"], ) bax.axvline( -1 * self.log2fc_threshold, color=threshold_lines["color"], linestyle=threshold_lines["ls"], linewidth=threshold_lines["width"], ) if logy is True: ax = pylab.gca() ax.set(yscale="log")