Source code for sequana.enrichment.gsea

# -*- coding: utf-8 -*-
#
#  This file is part of Sequana software
#
#  Copyright (c) 2016-2020 - Sequana Development Team
#
#  File author(s):
#      Thomas Cokelaer <thomas.cokelaer@pasteur.fr>
#      Etienne Kornobis <etienne.kornobis@pasteur.fr>
#
#  Distributed under the terms of the 3-clause BSD license.
#  The full license is in the LICENSE file, distributed with this software.
#
#  website: https://github.com/sequana/sequana
#  documentation: http://sequana.readthedocs.io
#
##############################################################################

import tempfile

import colorlog
import gseapy

logger = colorlog.getLogger(__name__)

from sequana.lazy import pandas as pd

__all__ = ["GSEA"]


[docs]class GSEA: def __init__(self, gene_sets={}): """.. rubric:: **Constructor** :param species: """ #: attribute to store the gene sets to be checked for enrichment self.gene_sets = gene_sets self.no_plot = True # Remove description when using v0.13.0 of gseapy API
[docs] def compute_enrichment(self, gene_list, background=None, verbose=False, outdir=None): """ :param gene_list: list of genes (e.g. genes with significant fold change) :param background: expected background of the species. Should be number of genes for the species of interest. :param verbose: :param str outdir: a temporary directory to store reports and intermediate results """ if outdir is None: outdir = tempfile.TemporaryDirectory() try: enr = gseapy.enrichr( gene_list=gene_list, gene_sets=self.gene_sets, verbose=verbose, background=background, outdir=outdir.name, no_plot=self.no_plot, ) if len(enr.results): enr.results["Genes"] = [";".join(sorted(x.split(";"))) for x in enr.results["Genes"].values] enr.results["size"] = [len(x.split(";")) for x in enr.results.Genes] except ValueError: # if no hits, newest gseapy version will raise a ValueError from collections import namedtuple enrich = namedtuple("MyStruct", "results") enr = enrich(results=pd.DataFrame({"Genes": [], "size": [], "Term": []})) return enr