Answer to: How to visualize a dense, gappy time series with spikes in Python?
Score: 2 • Accepted
You could make a broken axis using matplotlib. It's not exactly staightforward, especially when you need to break the axis multiple time. I also added a multicolored line plot (see this example), to highlight the treshholds.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import itertools as itt
np.random.seed(42)
# Simulate a gappy sensor time series:
# ~12 hours of recording split into ~11 segments
# with ~30-40 min gaps (sensor offline periodically)
segments = []
t_start = 0
for i in range(11):
duration = np.random.uniform(2000, 3000)
n_points = int(duration / 0.1)
t = np.linspace(t_start, t_start + duration, n_points)
base = 70 + 20 * np.sin(2 * np.pi * t / 5000)
noise = np.cumsum(np.random.randn(n_points)) * 0.3
noise -= np.mean(noise)
rate = base + noise + np.random.poisson(base) - base
n_spikes = np.random.poisson(3)
for _ in range(n_spikes):
spike_pos = np.random.randint(0, n_points)
spike_amp = np.random.exponential(100)
spike_width = np.random.uniform(5, 50)
spike = spike_amp * np.exp(-0.5 * ((t - t[spike_pos]) / spike_width) ** 2)
rate += spike
rate = np.maximum(rate, 0)
segments.append((t / 3600, rate)) # store in hours
gap = np.random.uniform(1800, 2400)
t_start += duration + gap
# Thresholds for color-coding
all_rates = np.concatenate([s[1] for s in segments])
med = np.median(all_rates[all_rates > 0])
thresh1 = med
thresh2 = med * 2
print(f"Total points: {len(all_rates)}")
print(f"Segments: {len(segments)}")
print(f"Median: {med:.1f}, 2x median: {thresh2:.1f}, Max: {np.max(all_rates):.1f}")
def color_for_rate(r, t1=thresh1, t2=thresh2):
return list(np.where(r < t1, '#2166ac', np.where(r < t2, '#ff8c00', '#cc0000')).astype(str))
def bin_segment(t, r, bin_width_hrs=100/3600):
"""Bin a single continuous segment."""
if len(t) < 2:
return np.array([]), np.array([]), np.array([])
t_bins, r_bins, e_bins = [], [], []
t_min, t_max = t[0], t[-1]
edges = np.arange(t_min, t_max, bin_width_hrs)
for j in range(len(edges) - 1):
mask = (t >= edges[j]) & (t < edges[j+1])
if np.sum(mask) > 0:
t_bins.append(np.mean(t[mask]))
r_bins.append(np.mean(r[mask]))
e_bins.append(np.std(r[mask]) / np.sqrt(np.sum(mask)))
return np.array(t_bins), np.array(r_bins), np.array(e_bins)
def colored_line(x, y, c, ax, **lc_kwargs):
"""
Plot a line with a color specified along the line by a third value.
It does this by creating a collection of line segments. Each line segment is
made up of two straight lines each connecting the current (x, y) point to the
midpoints of the lines connecting the current point with its two neighbors.
This creates a smooth line with no gaps between the line segments.
Parameters
----------
x, y : array-like
The horizontal and vertical coordinates of the data points.
c : array-like
The color values, which should be the same size as x and y.
ax : Axes
Axis object on which to plot the colored line.
**lc_kwargs
Any additional arguments to pass to matplotlib.collections.LineCollection
constructor. This should not include the array keyword argument because
that is set to the color argument. If provided, it will be overridden.
Returns
-------
matplotlib.collections.LineCollection
The generated line collection representing the colored line.
"""
if "array" in lc_kwargs:
warnings.warn('The provided "array" keyword argument will be overridden')
# Default the capstyle to butt so that the line segments smoothly line up
default_kwargs = {"capstyle": "butt"}
default_kwargs.update(lc_kwargs)
# Compute the midpoints of the line segments. Include the first and last points
# twice so we don't need any special syntax later to handle them.
x = np.asarray(x)
y = np.asarray(y)
x_midpts = np.hstack((x[0], 0.5 * (x[1:] + x[:-1]), x[-1]))
y_midpts = np.hstack((y[0], 0.5 * (y[1:] + y[:-1]), y[-1]))
# Determine the start, middle, and end coordinate pair of each line segment.
# Use the reshape to add an extra dimension so each pair of points is in its
# own list. Then concatenate them to create:
# [
# [(x1_start, y1_start), (x1_mid, y1_mid), (x1_end, y1_end)],
# [(x2_start, y2_start), (x2_mid, y2_mid), (x2_end, y2_end)],
# ...
# ]
coord_start = np.column_stack((x_midpts[:-1], y_midpts[:-1]))[:, np.newaxis, :]
coord_mid = np.column_stack((x, y))[:, np.newaxis, :]
coord_end = np.column_stack((x_midpts[1:], y_midpts[1:]))[:, np.newaxis, :]
segments = np.concatenate((coord_start, coord_mid, coord_end), axis=1)
lc = LineCollection(segments, colors=c, **default_kwargs)
return ax.add_collection(lc)
# =====================================================================
# 6 visualization approaches
# =====================================================================
fig, axes = plt.subplots(1, len(segments), sharey=True, figsize=(20, 3))
fig.subplots_adjust(wspace=0.1)
for ax, segment in zip(axes.flat, segments):
# Display lines
colored_line(segment[0], segment[1], color_for_rate(segment[1]), ax)
# Set axis limits manually
ax.set_xlim(segment[0][0], segment[0][-1])
ax.set_ylim(bottom=0, top=500)
# Remove ticks that are too close to one of the breaks
ax.set_xticks(
[val for val in ax.get_xticks()
if (segment[0][0] + 0.05 < val < segment[0][-1] - 0.05)]
)
# Thresh lines
ax.axhline(y=thresh1, color='#ff8c00', linestyle='--', lw=0.5)
ax.axhline(y=thresh2, color='#cc0000', linestyle='--', lw=0.5)
# Remove unused spines and ticks
for ax_left, ax_right in itt.pairwise(axes.flat):
ax_left.spines.right.set_visible(False)
ax_right.spines.left.set_visible(False)
ax_left.tick_params(right=False)
ax_right.tick_params(left=False)
axes[0].yaxis.tick_left()
axes[-1].yaxis.tick_right()
# Create break lines
d = 2 # proportion of vertical to horizontal extent of the slanted line
kwargs = dict(
marker=[(-1, -d), (1, d)],
markersize=12,
linestyle="none", color='k', mec='k', mew=1, clip_on=False
)
for ax_left, ax_right in itt.pairwise(axes.flat):
ax_left.plot([1, 1], [0, 1], transform=ax_left.transAxes, **kwargs)
ax_right.plot([0, 0], [0, 1], transform=ax_right.transAxes, **kwargs)
fig.savefig('example.png')
Output:
EDIT: I looked into bining, but I don't think it's a good idea for your use-case, since it might erase small peaks. I'd say the raw data is probably best.
View Question ↗
Question
Parent Entity
Score: 3 • Views: 59
Site: stackoverflow
Other Comments / Reviews
SaaS Metrics