Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 86 additions & 31 deletions fastplot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@
import numpy as np
import pandas as pd
import re
from packaging import version
from statsmodels.distributions.empirical_distribution import ECDF

MPL_VERSION = mpl.__version__
HAS_ECDF_PLOT = version.parse(MPL_VERSION) >= version.parse('3.8.0') # 3.8.0 was released in September 2023

# Register Pandas Converters
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
Expand Down Expand Up @@ -47,7 +51,7 @@ def plot(data, path, mode = 'line',
linewidth = 1, boxplot_whis=[5,95], timeseries_format='%Y/%m/%d', bars_width=0.6,
boxplot_numerousness = False, boxplot_numerousness_fontsize = 'x-small', boxplot_fliersize=0,
boxplot_palette=sns.color_palette(), boxplot_empty=False, boxplot_numerousness_rotate=None,
callback = None, timeseries_stacked_right_legend_order=True, CDF_complementary=False, vlines=None, hlines=None, vlines_style={}, hlines_style={}):
callback = None, timeseries_stacked_right_legend_order=True, CDF_complementary=False, vlines=None, hlines=None, vlines_style={}, hlines_style={}, stats=None):

# 1. Create and configure plot visual style
plt.rcParams.update(plt.rcParamsDefault)
Expand Down Expand Up @@ -96,33 +100,12 @@ def plot(data, path, mode = 'line',
plt.plot(data[0], data[1], markeredgewidth=0, linewidth = linewidth, **plot_args)

elif mode == 'CDF':
s = data
e = ECDF(s)
if xscale == 'log':
x = np.logspace(np.log10(min(s)), np.log10(max(s)), NUM_BIN_CDF )
if CDF_complementary:
y = 1-e(x)
else:
y = e(x)
if HAS_ECDF_PLOT:
# Use plt.ecdf for matplotlib 3.8+
plt.ecdf(data, complementary=CDF_complementary, linewidth=linewidth, **plot_args)
else:
x = np.linspace(min(s), max(s), NUM_BIN_CDF )
if CDF_complementary:
y = 1-e(x)
x = np.concatenate( (np.array([min(s)]), x) )
y = np.concatenate( (np.array([1]), y) )
else:
y = e(x)
x = np.concatenate( (np.array([min(s)]), x) )
y = np.concatenate( (np.array([0]), y) )

plt.plot(x,y, linewidth = linewidth, **plot_args)
if ylabel is None:
ylabel = 'CCDF' if CDF_complementary else "CDF"
if ylim is None:
ylim = (0,1)

elif mode == 'CDF_multi':
for s_name, s in data :
# Fallback to statsmodels for older matplotlib versions
s = data
e = ECDF(s)
if xscale == 'log':
x = np.logspace(np.log10(min(s)), np.log10(max(s)), NUM_BIN_CDF )
Expand All @@ -132,7 +115,6 @@ def plot(data, path, mode = 'line',
y = e(x)
else:
x = np.linspace(min(s), max(s), NUM_BIN_CDF )

if CDF_complementary:
y = 1-e(x)
x = np.concatenate( (np.array([min(s)]), x) )
Expand All @@ -142,13 +124,57 @@ def plot(data, path, mode = 'line',
x = np.concatenate( (np.array([min(s)]), x) )
y = np.concatenate( (np.array([0]), y) )

plt.plot(x,y, label=s_name, linewidth = linewidth, **plot_args)
plt.plot(x,y, linewidth = linewidth, **plot_args)
if ylabel is None:
ylabel = 'CCDF' if CDF_complementary else "CDF"
if ylim is None:
ylim = (0,1)

if isinstance(stats, dict):
clear_dict(stats)
for k, v in get_distribution_stats(data).items():
stats[k] = v

elif mode == 'CDF_multi':
for s_name, s in data:
if HAS_ECDF_PLOT:
# Use plt.ecdf for matplotlib 3.8+
plt.ecdf(s, label=s_name, complementary=CDF_complementary, linewidth=linewidth, **plot_args)
else:
# Fallback for older matplotlib versions
e = ECDF(s)
if xscale == 'log':
x = np.logspace(np.log10(min(s)), np.log10(max(s)), NUM_BIN_CDF )
if CDF_complementary:
y = 1-e(x)
else:
y = e(x)
else:
x = np.linspace(min(s), max(s), NUM_BIN_CDF )

if CDF_complementary:
y = 1-e(x)
x = np.concatenate( (np.array([min(s)]), x) )
y = np.concatenate( (np.array([1]), y) )
else:
y = e(x)
x = np.concatenate( (np.array([min(s)]), x) )
y = np.concatenate( (np.array([0]), y) )

plt.plot(x,y, label=s_name, linewidth = linewidth, **plot_args)

if ylabel is None:
ylabel = 'CCDF' if CDF_complementary else "CDF"
if ylim is None:
ylim = (0,1)

if isinstance(stats, dict):
clear_dict(stats)
for name, samples in data:
stats[name] = {}
for k, v in get_distribution_stats(samples).items():
stats[name][k] = v

elif mode == 'boxplot':
labels = [e[0] for e in data]
samples = [e[1] for e in data]
Expand Down Expand Up @@ -177,6 +203,13 @@ def plot(data, path, mode = 'line',
plt.setp(plt.gca().patches, edgecolor = 'black', facecolor='white', linewidth =1)
plt.setp(plt.gca().lines, color='black', linewidth =1)

if isinstance(stats, dict):
clear_dict(stats)
for name, samples in data:
stats[name] = {}
for k, v in get_distribution_stats(samples).items():
stats[name][k] = v

elif mode == 'boxplot_multi':
new_data = []
for c in data:
Expand All @@ -189,6 +222,13 @@ def plot(data, path, mode = 'line',
p.legend().remove()
plt.xlabel("")
plt.gca().set_xticklabels(data.index)

if isinstance(stats, dict):
clear_dict(stats)
for c in data:
stats[c] = {}
for index, values in data[c].items():
stats[c][index] = get_distribution_stats(values)

elif mode == 'timeseries':
plt.plot(data, markeredgewidth=0, linewidth = linewidth, **plot_args)
Expand Down Expand Up @@ -340,6 +380,22 @@ def tex_escape(text):
regex = re.compile('|'.join(re.escape(str(key)) for key in sorted(conv.keys(), key = lambda item: - len(item))))
return regex.sub(lambda match: conv[match.group()], text)


def get_distribution_stats(data):
stats = {}
stats['min'] = np.min(data)
stats['q1'] = np.percentile(data, 25)
stats['median'] = np.median(data)
stats['q3'] = np.percentile(data, 75)
stats['max'] = np.max(data)
stats['mean'] = np.mean(data)
stats['std'] = np.std(data)
return stats

def clear_dict(d):
for k in list(d.keys()):
del d[k]

def gini(arr):
count = arr.size
coefficient = 2 / count
Expand All @@ -365,5 +421,4 @@ def lorenz_gini_multi(data, name_format="{} (GI={:0.2f})"):
name_new = name_format.format(name, gini_index)
data_new.append( (name_new, (lorenz_x,lorenz_y) ) )
return data_new



1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ pandas
statsmodels
scipy
seaborn
packaging
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
url="https://github.com/marty90/fastplot",
download_url = 'https://github.com/marty90/fastplot/tarball/1.5.0',
packages=['fastplot'],
install_requires=['matplotlib', 'numpy', 'pandas', 'statsmodels', 'scipy', 'seaborn']
install_requires=['matplotlib', 'numpy', 'pandas', 'statsmodels', 'scipy', 'seaborn', 'packaging']
)

# Upload on pip with:
Expand Down