Source code for MidlineIdentifier.plotting

import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import scanpy as sc
import seaborn as sns



# plot along coords

[docs]
def trend_plot(budoid, features, groupby, coords = 'major_coor_used', save = False, **kwargs):
    """
    Makes a *trend plot* of the expression values of `var_names` as a function of `coords`

    For each var_name and each `groupby` category a dot is plotted.
    Each dot represents two values: mean expression within each category
    (visualized by color) and fraction of cells expressing the `var_name` in the
    category (visualized by the size of the dot). If `groupby` is not given,
    the dotplot assumes that all data belongs to a single category.

    This function use :func:`seaborn.lmplot`. If you need more flexibility, you should use :func:`seaborn.lmplot` directly.


    Parameters
    ----------
    feature : :class:`str` | :class:`list`
        Column name in `.var` DataFrame that stores gene symbols. By default `var_names` refer to the index column of the `.var` DataFrame.
    groupby : :class:`str`
        The key of the observation grouping to consider. Must be one of `obs.columns`
    coords : :class:`str` (default: `'major_coor_used'`)
        To which the gene expression should be consider to. Must be one of `obs.columns`.
    save : :class:`bool` (default: `False`)
        If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}.
    kwargs
        Additonal arguments to pass to :func:`seaborn.lmplot`


    Returns
    -------
    :meth:`seaborn.lmplot` object.


    Examples
    --------

    Create a trend plot using the given markers using an example dataset grouped by the category 'batch'.

    .. highlight:: python
    .. code-block:: python

        import PSUils as ps

        budoid1 = ps.io.ReadObj('testdata/Budoid_1A/Budoids.pkl')
        budoid2 = ps.io.ReadObj('testdata/Budoid_3H/Budoids.pkl')
        budoid1.Concat(budoid2)

        markers = ['Col9a2','Col3a1']
        sc.pl.dotplot(budoid1, markers, groupby='batch')

    """

    adata = budoid.data.adata

    if groupby is None or groupby not in adata.obs.columns:
        raise ValueError(f"groupby = {groupby} must be one of {adata.obs.columns}")

    if isinstance(features, str):
        features = [features]

    kwargs_final = {
        'order': 2,
        'line_kws' :{'lw':5},
        'x_bins' : np.linspace(0, 1, 8)[1:-1],
        'x_estimator': lambda x: np.log(np.mean(x) + 1),
        'truncate' : True
        }

    kwargs_final.update(kwargs)

    genes = list(set(features) & set(adata.var_names))
    if len(genes) == 0:
        print('None of the requested genes is in the data.')
    if len(genes) < len(features):
        print("%s is not in the data, continue without them." % ', '.join(set(features) - set(genes)))


    df = sc.get.obs_df(adata, keys = genes + [coords, groupby])
    df = pd.melt(df,id_vars = [coords, groupby], var_name='genes', value_name='exp')

    lm = sns.lmplot(df, x = coords, y = 'exp', hue = groupby, col = 'genes', **kwargs_final)

    if isinstance(save, bool):
        fn = 'trendplot.pdf'
        plt.savefig(fn)
    elif isinstance(save , str):
        fn = save if save.endswith((".svg", ".pdf", ".png")) else save + '.pdf'
        plt.savefig(fn)

    return lm