Gallery

The gallery should help you to see the themes and colors applied in different plots. This template can also serve as an inspiration to get an idea of which graphics might be suitable for your case. Note that this template should not be considered as a 1 to 1 guideline but rather as a tool which helps statworx to create a more coherent picture when visualizing data.

Apply Style

To apply the statworx theme to a matplotlib plot, simply run the apply_style functions as shown below. All subsequently generated plots will have the statworx colors.

# mkdocs: render
from statworx_theme import apply_style
apply_style()

Distribution

Density ChartStacked Distribution ChartViolin PlotHistogram

# mkdocs: render
import seaborn as sns
tips = sns.load_dataset("tips")
sns.kdeplot(data=tips, x="total_bill", hue="time", fill=True, common_norm=False)

# mkdocs: render
import seaborn as sns

# Load the diamonds dataset
diamonds = sns.load_dataset("diamonds")

# Plot the distribution of clarity ratings, conditional on carat
sns.displot(
    data=diamonds,
    x="carat",
    hue="cut",
    kind="kde",
    multiple="fill",
    clip=(0, None),
    palette="stwx:Bl_fade",
)

# mkdocs: render
import seaborn as sns

# Load the example tips dataset
tips = sns.load_dataset("tips")

# Draw a nested violinplot and split the violins for easier comparison
sns.violinplot(
    data=tips,
    x="day",
    y="total_bill",
    hue="smoker",
    split=True,
    inner="quart",
)
sns.despine(left=True)

Hint

When making a histogram, always explore multiple bin widths. In addition, avoid overlapping histograms as they can be confused with stacked bar charts.

# mkdocs: render
import seaborn as sns

diamonds = sns.load_dataset("diamonds")

sns.histplot(
    diamonds,
    x="price", hue="cut",
    multiple="stack",
    log_scale=True,
)

Timeseries

Line PlotInterval Line Plot

# mkdocs: render
import numpy as np
import pandas as pd
import seaborn as sns

rs = np.random.RandomState(365)
values = rs.randn(365, 4).cumsum(axis=0)
dates = pd.date_range("1 1 2016", periods=365, freq="D")
data = pd.DataFrame(values, dates, columns=["A", "B", "C", "D"])
data = data.rolling(7).mean()

sns.lineplot(data=data, linewidth=2.5)

# mkdocs: render
import seaborn as sns

# Load an example dataset with long-form data
fmri = sns.load_dataset("fmri")

# Plot the responses for different events and regions
sns.lineplot(x="timepoint", y="signal",
             hue="region", style="event",
             data=fmri)

Relationship

Regression PlotBubble ChartJoint DensityScatter PlotPair PlotCorrelation Plot

# mkdocs: render
import seaborn as sns

# Load the penguins dataset
penguins = sns.load_dataset("penguins")

# Plot sepal width as a function of sepal_length across days
g = sns.lmplot(
    data=penguins, x="bill_length_mm", y="bill_depth_mm", hue="species", height=5
)

# Use more informative axis labels than are provided by default
g.set_axis_labels("Snoot length (mm)", "Snoot depth (mm)")
plt.title("Lorem Ipsum Dolor Sit Amet")

# mkdocs: render
import seaborn as sns

# Load the example mpg dataset
mpg = sns.load_dataset("mpg")

# Plot miles per gallon against horsepower with other semantics
sns.relplot(x="horsepower", y="mpg", hue="origin", size="weight",
            sizes=(40, 400), alpha=.5, palette="stwx:alternative",
            height=6, data=mpg)

# mkdocs: render
import seaborn as sns

# Load the penguins dataset
penguins = sns.load_dataset("penguins")

# Show the joint distribution using kernel density estimation
g = sns.jointplot(
    data=penguins,
    x="bill_length_mm", y="bill_depth_mm", hue="species",
    kind="kde",
)

# mkdocs: render
import seaborn as sns
mpg = sns.load_dataset("mpg")

# Use JointGrid directly to draw a custom plot
g = sns.JointGrid(data=mpg, x="mpg", y="acceleration", space=0, ratio=17)
g.plot_joint(sns.scatterplot, legend=False)
g.plot_marginals(sns.rugplot, height=1)

# mkdocs: render

import seaborn as sns

df = sns.load_dataset("penguins")
sns.pairplot(df, hue="species")

# mkdocs: render

from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Generate a large random dataset
rs = np.random.RandomState(33)
d = pd.DataFrame(data=rs.normal(size=(100, 10)),
                 columns=list(ascii_letters[26:26+10]))

# Compute the correlation matrix
corr = d.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap="stwx:Bl_fade", vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

Comparison

Bar PlotStacked Bar PlotBoxplotBoxen PlotSwarm Plot

Hint

One should only rearrange bars, when there is no natural ordering to the categories. Whenever there is a natural ordering (i.e. when our categorical variable is an ordered factor) one should keep the original ordering in the visualization. When bars are of similar length it is visually less appealing to use bar plots (“Moire effect”). In this case, one can resort to use Lollipop charts.

# mkdocs: render

import seaborn as sns

penguins = sns.load_dataset("penguins")

# Draw a nested barplot by species and sex
g = sns.catplot(
    data=penguins, kind="bar",
    x="species", y="body_mass_g", hue="sex",
    ci=None
)
g.despine(left=True)
g.set_axis_labels("", "Body mass (g)")
g.legend.set_title("")

# mkdocs: render

import seaborn as sns
import matplotlib.pyplot as plt
from statworx_theme.colors import BLUE, LIGHT_BLUE

# Initialize the matplotlib figure
f, ax = plt.subplots(figsize=(6, 15))

# Load the example car crash dataset
crashes = sns.load_dataset("car_crashes").sort_values("total", ascending=False)

# Plot the total crashes
sns.set_color_codes("pastel")
sns.barplot(x="total", y="abbrev", data=crashes,
            label="Total", color=LIGHT_BLUE)

# Plot the crashes where alcohol was involved
sns.set_color_codes("muted")
sns.barplot(x="alcohol", y="abbrev", data=crashes,
            label="Alcohol-involved", color=BLUE)

# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, 24), ylabel="",
       xlabel="Automobile collisions per billion miles")
sns.despine(left=True, bottom=True)

Hint

Adding jittered points to a box plot is useful to see the underlying distribution of the data.

# mkdocs: render

import seaborn as sns

# Load the example tips dataset
tips = sns.load_dataset("tips")

# Draw a nested boxplot to show bills by day and time
sns.boxplot(x="day", y="total_bill",
            hue="smoker",
            data=tips)
sns.despine(offset=10, trim=True)

# mkdocs: render
import seaborn as sns

diamonds = sns.load_dataset("diamonds")
clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"]

sns.boxenplot(x="clarity", y="carat",
              color=BLUE, order=clarity_ranking,
              scale="linear", data=diamonds)

# mkdocs: render
import seaborn as sns

# Load the penguins dataset
df = sns.load_dataset("penguins")

# Draw a categorical scatterplot to show each observation
ax = sns.swarmplot(data=df, x="body_mass_g", y="sex", hue="species")
ax.set(ylabel="")