Source code for tissue_purifier.plots.plot_misc

import matplotlib
from matplotlib import pyplot as plt
from typing import Tuple, Any, List, Union
import numpy
import torch
import pandas
import seaborn


[docs]def plot_cdf_pdf( pdf_y: Union[numpy.ndarray, torch.Tensor] = None, cdf_y: Union[numpy.ndarray, torch.Tensor] = None, x_label: str = None, sup_title: str = None) -> plt.Figure: """ Plot the Probability Density Function (pdf) and Cumulative Density Function (cdf). Args: pdf_y: array like data cdf_y: Optional. The cdf to plot. If not present it can be computed internally from pdf_y x_label: the label of the x-axis sup_title: the title for both panels Returns: fig: A two panel figure with the pdf and cdf. """ assert cdf_y is None or len(cdf_y.shape) == 1, "cdf_y must be None or 1D array. Received {0}".format(cdf_y.shape) assert pdf_y is None or len(pdf_y.shape) == 1, "pdf_y must be None or 1D array. Received {0}".format(pdf_y.shape) if cdf_y is not None and pdf_y is None: pdf_y = cdf_y.clone() for i in range(1, len(cdf_y)): pdf_y[i] = cdf_y[i] - cdf_y[i - 1] pdf_y[0] = cdf_y[0] elif cdf_y is None and pdf_y is not None: cdf_y = numpy.cumsum(pdf_y, axis=0) cdf_y /= cdf_y[-1] fig, axes = plt.subplots(ncols=2, figsize=(4 * 2, 4)) _ = axes[0].plot(pdf_y, '.') _ = axes[0].set_ylabel("pdf") _ = axes[1].plot(cdf_y, '.') _ = axes[1].set_ylabel("cdf") if x_label: _ = axes[0].set_xlabel(x_label) _ = axes[1].set_xlabel(x_label) if sup_title: fig.suptitle(sup_title) # fig.tight_layout() plt.close(fig) return fig
def _plot_multigroup_bars( ax: "matplotlib.axes.Axes", y_values: Union[torch.Tensor, numpy.ndarray], y_errors: Union[torch.Tensor, numpy.ndarray] = None, x_labels: List[Any] = None, group_labels: List[Any] = None, title: str = None, group_legend: bool = None, y_lim: Tuple[float, float] = None) -> plt.Figure: """ Make a bar plot of a tensor of shape (groups, x_locs). Each x_loc will have n_groups bars shown next to each other. Args: ax: the current axes to draw the the bars y_values: tensor of shape: (groups, x_locs) with the means y_errors: tensor of shape: (groups, x_locs) with the stds (optional) x_labels: List[str] of length N_types group_labels: List[str] of length N_groups title: string. The title of the plot group_legend: bool. If true show the group legend. y_lim: Tuple[float, float] specifies the extension of the y_axis. For example y_lim = (0.0, 1.0) """ assert y_errors is None or y_errors.shape == y_values.shape if len(y_values.shape) == 1: n_groups = 1 n_values = y_values.shape[0] # add singleton dimension y_values = y_values[None, :] y_errors = None if y_errors is None else y_errors[None, :] elif len(y_values.shape) == 2: n_groups, n_values = y_values.shape else: raise Exception("y_values must be a 1D or 2D array (if multiple groups). Received {0}.".format(y_values.shape)) assert x_labels is None or (isinstance(x_labels, list) and len(x_labels) == n_values) assert group_labels is None or (isinstance(group_labels, list) and len(group_labels) == n_groups) X_axis = numpy.arange(n_values) width = 0.9 / n_groups for n in range(n_groups): group_label = None if group_labels is None else group_labels[n] _ = ax.bar(X_axis + n * width, y_values[n], width, label=group_label) if y_errors: _ = ax.errorbar(X_axis + n * width, y_values[n], yerr=y_errors[n], fmt="o", color="r") show_legend = (group_legend is None and group_labels is not None) or group_legend if show_legend: ax.legend() if x_labels: ax.set_xticks(X_axis + 0.45) ax.set_xticklabels(x_labels, rotation=90) else: ax.set_xticks(X_axis + 0.45) if y_lim: ax.set_ylim(y_lim) if title: ax.set_title(title)
[docs]def plot_clusters_annotations( input_dictionary: dict, cluster_key: str, annotation_keys: List[str], titles: List[str] = None, sup_title: str = None, n_col: int = 3, figsize: Tuple[float, float] = None) -> plt.Figure: """ ADD DOC STRING """ def _preprocess_to_numpy(_y) -> numpy.ndarray: if isinstance(_y, torch.Tensor): return _y.cpu().detach().numpy() elif isinstance(_y, list): return numpy.array(_y) elif isinstance(_y, numpy.ndarray): return _y else: raise Exception( "Labels is either None or torch.Tensor, List, numpy.array. Received {0}".format(type(_y))) def _is_continuous(_y) -> bool: is_float = isinstance(_y[0].item(), float) lot_of_values = len(numpy.unique(_y)) > 20 return is_float * lot_of_values assert isinstance(n_col, int) and n_col >= 1, "n_col must be an integer >= 1. Received {0}".format(n_col) assert isinstance(annotation_keys, list) and set(annotation_keys).issubset(set(input_dictionary.keys())), \ "Error. Annotation_keys must be a list of keys all of which are present in the input dictionary." assert isinstance(cluster_key, str) and cluster_key in input_dictionary.keys(), \ "Error. Cluster_key is not present in the input dictionary." assert titles is None or (isinstance(titles, list) and len(titles) == len(annotation_keys)), \ "Tiles is either None or a list of length len(annotation_keys) = {0}".format(len(annotation_keys)) assert sup_title is None or isinstance(sup_title, str), \ "Sup_tile is either None or a string. Received {0}".format(sup_title) n_max = len(annotation_keys) n_col = min(n_col, n_max) n_row = int(numpy.ceil(float(n_max) / n_col)) figsize = (4 * n_col, 4 * n_row) if figsize is None else figsize fig, axes = plt.subplots(ncols=n_col, nrows=n_row, figsize=figsize) cluster_labels_np = _preprocess_to_numpy(input_dictionary[cluster_key]) unique_cluster_labels = numpy.unique(cluster_labels_np) for n, annotation_k in enumerate(annotation_keys): title = None if titles is None else titles[n] if n_col == 1 and n_row == 1: ax_curr = axes elif n_row == 1: ax_curr = axes[n] else: c = n % n_col r = n // n_col ax_curr = axes[r, c] annotation_tmp = input_dictionary[annotation_k] annotation_np = _preprocess_to_numpy(annotation_tmp) if _is_continuous(annotation_np): # make violin plots df_tmp = pandas.DataFrame.from_dict({'clusters': cluster_labels_np, annotation_k: annotation_np}) _ = seaborn.violinplot(x='clusters', y=annotation_k, data=df_tmp, ax=ax_curr) else: # make a multi bar-chart. I need counts of shape (n_clusters, n_unique_annotations) unique_annotations = numpy.unique(annotation_np) # shape: na counts = numpy.zeros((len(unique_cluster_labels), len(unique_annotations)), dtype=int) for n1, l_cluster in enumerate(unique_cluster_labels): mask_cluster = (cluster_labels_np == l_cluster) for n2, l_annotation in enumerate(unique_annotations): mask_annotation = (annotation_np == l_annotation) counts[n1, n2] = (mask_cluster * mask_annotation).sum() _ = _plot_multigroup_bars(ax=ax_curr, y_values=counts, x_labels=unique_annotations.tolist(), group_labels=unique_cluster_labels.tolist(), group_legend=False) ax_curr.set_title(title) if sup_title: fig.suptitle(sup_title) fig.tight_layout() plt.close(fig) return fig
[docs]def plot_multiple_barplots( data: "pandas.DataFrame", x: str, ys: List[str], n_col: int = 4, figsize: Tuple[float, float] = None, y_labels: List[str] = None, x_labels_rotation: int = 90, x_labels: List[str] = None, titles: List[str] = None, y_lims: Tuple[float, float] = None, **kargs) -> plt.Figure: """ IMPROVE DOCSTRING Takes a dataframe and make multiple bar plots leveraging seaborn.barplot(y=y, x=x, data=data) Args: data: a dataframe with the data to plot x: names of (independent) variables in data ys: names of (dependent) variables in data n_col: number of columns panels in the figure figsize: size of the output figure x_labels: label for the x-axis y_labels: labels for the y-axis x_labels_rotation: rotation in degree of the x_labels (default 90) titles: titles for each panel y_lims: set limits for the y coordinate for all the panels kargs: any argument passed to seaborn.barplot such as hue, Returns: fig: Figure with XXX panels """ n_max = len(ys) n_col = min(n_col, n_max) n_row = int(numpy.ceil(float(n_max) / n_col)) figsize = (4 * n_col, 4 * n_row) if figsize is None else figsize fig, axes = plt.subplots(ncols=n_col, nrows=n_row, figsize=figsize) if titles: assert len(titles) == n_max if y_labels: assert len(y_labels) == n_max for n, y in enumerate(ys): if n_col == 1 and n_row == 1: ax_curr = axes elif n_row == 1: ax_curr = axes[n] else: c = n % n_col r = n // n_col ax_curr = axes[r, c] _ = seaborn.barplot(y=y, x=x, data=data, ax=ax_curr, **kargs) # y_lims if y_lims: ax_curr.set_ylim(y_lims[0], y_lims[1]) # x_labels : x_labels_raw = ax_curr.get_xticklabels() if x_labels: assert len(x_labels) == len(x_labels_raw) else: x_labels = x_labels_raw ax_curr.set_xticklabels(labels=x_labels, rotation=x_labels_rotation) # titles title = ax_curr.get_ylabel() if titles is None else titles[n] ax_curr.set_title(title) ax_curr.set_xlabel(None) # y_labels if y_labels: ax_curr.set_ylabel(y_labels[n]) else: ax_curr.set_ylabel(None) fig.tight_layout() plt.close(fig) return fig
def show_corr_matrix(data: torch.Tensor, show_colorbar: bool = True, sup_title: str = None): data = data.detach().cpu().clone() mask = torch.eye(data.shape[0]).bool() fig, axes = plt.subplots(ncols=2, nrows=2, figsize=(12, 8)) _ = seaborn.heatmap( data=data, square=True, xticklabels=False, yticklabels=False, center=0.0, robust=False, # so that I can see the full scale of diagonal and off-diagonal cbar=show_colorbar, ax=axes[0, 0]) _ = axes[0, 0].set_title("Diagonal and off-diagonal") diagonal = data[mask].flatten().numpy() _ = seaborn.histplot(x=diagonal, kde=True, bins=100, ax=axes[1, 0]) _ = axes[1, 0].set_title("Histogram of the diagonal element") data_overwritten = data.clone() data_overwritten[mask] = 0.0 seaborn.heatmap(data=data_overwritten, square=True, xticklabels=False, yticklabels=False, center=0.0, robust=True, cbar=show_colorbar, ax=axes[0, 1]) _ = axes[0, 1].set_title("Off-diagonal only") off_diagonal = data[~mask].flatten().numpy() _ = seaborn.histplot(x=off_diagonal, kde=True, bins=100, ax=axes[1, 1]) _ = axes[1, 1].set_title("Histogram of the off-diagonal element") if sup_title: _ = fig.suptitle(sup_title) plt.close(fig) return fig