from statsforecast import StatsForecast
from statsforecast.models import SeasonalNaive, Naive, HistoricAverage
import pandas as pd
import matplotlib.pyplot as plt
import random
from itertools import product
import matplotlib.dates as mdatesfrom utilsforecast.data import generate_series
Y_df = generate_series(n_series=128, freq='ME', min_length=60, max_length=60, with_trend=True)
Y_df| unique_id | ds | y | |
|---|---|---|---|
| 0 | 0 | 2000-01-31 | 0.274407 |
| 1 | 0 | 2000-02-29 | 2.227602 |
| 2 | 0 | 2000-03-31 | 4.041396 |
| 3 | 0 | 2000-04-30 | 5.882464 |
| 4 | 0 | 2000-05-31 | 7.691857 |
| ... | ... | ... | ... |
| 7675 | 127 | 2004-08-31 | 44.923498 |
| 7676 | 127 | 2004-09-30 | 46.361813 |
| 7677 | 127 | 2004-10-31 | 48.138033 |
| 7678 | 127 | 2004-11-30 | 49.535287 |
| 7679 | 127 | 2004-12-31 | 51.424035 |
7680 rows × 3 columns
sf = StatsForecast(
models=[SeasonalNaive(season_length=12), Naive(), HistoricAverage()],
freq='ME',
n_jobs=-1
)
cv_df = sf.cross_validation(df=Y_df, h=12)
cv_df| unique_id | ds | cutoff | y | SeasonalNaive | Naive | HistoricAverage | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | 2004-01-31 | 2003-12-31 | 41.918068 | 31.626313 | 51.954809 | 26.218289 |
| 1 | 0 | 2004-02-29 | 2003-12-31 | 43.812216 | 33.498739 | 51.954809 | 26.218289 |
| 2 | 0 | 2004-03-31 | 2003-12-31 | 45.785467 | 35.532154 | 51.954809 | 26.218289 |
| 3 | 0 | 2004-04-30 | 2003-12-31 | 47.589676 | 37.271197 | 51.954809 | 26.218289 |
| 4 | 0 | 2004-05-31 | 2003-12-31 | 49.734570 | 38.980048 | 51.954809 | 26.218289 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1531 | 127 | 2004-08-31 | 2003-12-31 | 44.923498 | 36.354514 | 43.117257 | 21.773173 |
| 1532 | 127 | 2004-09-30 | 2003-12-31 | 46.361813 | 38.185409 | 43.117257 | 21.773173 |
| 1533 | 127 | 2004-10-31 | 2003-12-31 | 48.138033 | 40.096793 | 43.117257 | 21.773173 |
| 1534 | 127 | 2004-11-30 | 2003-12-31 | 49.535287 | 41.398671 | 43.117257 | 21.773173 |
| 1535 | 127 | 2004-12-31 | 2003-12-31 | 51.424035 | 43.117257 | 43.117257 | 21.773173 |
1536 rows × 7 columns
cutoff = pd.Timestamp('2003-12-31')from utilsforecast.evaluation import evaluate
from utilsforecast.losses import mae, rmse
df_eval = evaluate(cv_df, metrics=[mae, rmse], models=['SeasonalNaive', 'Naive', 'HistoricAverage'])
df_eval| unique_id | cutoff | metric | SeasonalNaive | Naive | HistoricAverage | |
|---|---|---|---|---|---|---|
| 0 | 0 | 2003-12-31 | mae | 10.385087 | 5.577062 | 26.025675 |
| 1 | 1 | 2003-12-31 | mae | 7.175618 | 4.931577 | 17.871735 |
| 2 | 2 | 2003-12-31 | mae | 4.414868 | 4.718237 | 11.174016 |
| 3 | 3 | 2003-12-31 | mae | 0.999545 | 5.029895 | 3.747323 |
| 4 | 4 | 2003-12-31 | mae | 5.954271 | 4.808348 | 14.950838 |
| ... | ... | ... | ... | ... | ... | ... |
| 251 | 123 | 2003-12-31 | rmse | 8.823970 | 5.917266 | 22.595332 |
| 252 | 124 | 2003-12-31 | rmse | 3.841348 | 5.698673 | 10.594876 |
| 253 | 125 | 2003-12-31 | rmse | 4.699834 | 5.533386 | 12.615176 |
| 254 | 126 | 2003-12-31 | rmse | 10.197017 | 6.425962 | 26.298619 |
| 255 | 127 | 2003-12-31 | rmse | 8.199264 | 5.921180 | 21.203321 |
256 rows × 6 columns
def plot_grid(df_train, df_test=None, df_eval=None, plot_random=True, model=None, level=None, ids=None, descs=None, date_fmt=None):
if model is None and df_test is not None:
models = [c for c in df_test.columns if c not in ('unique_id', 'ds', 'y', 'y_test', 'cutoff')]
assert len(models) == 1, f"Multiple models found: {models}. Please specify `model`."
model = models[0]
fig, axes = plt.subplots(4, 2, figsize = (24, 16))
unique_ids = df_train['unique_id'].unique()
assert len(unique_ids) >= 8, "Must provide at least 8 ts"
if plot_random:
unique_ids = random.sample(list(unique_ids), k=8)
else:
unique_ids = ids
for uid, (idx, idy) in zip(unique_ids, product(range(4), range(2))):
train_uid = df_train.query('unique_id == @uid')
line, = axes[idx, idy].plot(train_uid['ds'], train_uid['y'], label='y', )
train_color = line.get_color()
if df_test is not None:
test_uid = df_test.query('unique_id == @uid')
axes[idx, idy].axvline(x=test_uid['cutoff'].iloc[0], color='grey', linestyle='--', label='cutoff')
for col in ['y', f'{model}', 'y_test']:
if col in test_uid:
if col == 'y': axes[idx, idy].plot(test_uid['ds'], test_uid[col], color=train_color, label='_nolegend_')
else: axes[idx, idy].plot(test_uid['ds'], test_uid[col], label=col)
if level is not None:
for l, alpha in zip(sorted(level), [0.5, .4, .35, .2]):
axes[idx, idy].fill_between(
test_uid['ds'],
test_uid[f'{model}-lo-{l}'],
test_uid[f'{model}-hi-{l}'],
alpha=alpha,
color='orange',
label=f'{model}_level_{l}',
)
# Build title — include MAE if eval data is available
title = f'UID: {uid}'
if descs is not None and uid in descs['unique_id'].values:
title = f"{descs.query('unique_id == @uid')['desc'].values[0]} | {title}"
if df_eval is not None and model is not None:
eval_uid = df_eval.query('unique_id == @uid')
if not eval_uid.empty and model in eval_uid.columns:
metrics = ' | '.join(f'{r.metric}: {r[model]:.2f}' for _, r in eval_uid.iterrows())
title += f' | {metrics}'
axes[idx, idy].set_title(title, fontweight='bold')
axes[idx, idy].set_xlabel('Date [m]')
axes[idx, idy].set_ylabel('Target')
axes[idx, idy].set_ylim(bottom=0)
axes[idx, idy].legend(loc='upper left')
if date_fmt in ('month', 'monthly'):
axes[idx, idy].xaxis.set_major_locator(mdates.MonthLocator(interval=3))
axes[idx, idy].xaxis.set_major_formatter(mdates.DateFormatter('%m/%Y'))
elif date_fmt == 'quarter':
axes[idx, idy].xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))
axes[idx, idy].xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{mdates.num2date(x):%Y} Q{(mdates.num2date(x).month-1)//3+1}"))
if date_fmt: plt.setp(axes[idx, idy].xaxis.get_majorticklabels(), rotation=45, ha='right')
axes[idx, idy].grid()
fig.subplots_adjust(hspace=0.5)
plt.show()
return Nonedescs = pd.DataFrame(dict(unique_id=[0, 3, 7], desc=['Widget Sales', 'Gizmo Revenue', 'Sprocket Orders']))plot_grid(Y_df, cv_df.query("cutoff == @cutoff"),
df_eval.query("cutoff == @cutoff"),
model='SeasonalNaive', descs=descs, date_fmt="quarter")
plot_grid(Y_df, cv_df.query("cutoff == @cutoff"),
df_eval.query("cutoff == @cutoff"),
model='SeasonalNaive', descs=descs, date_fmt="month")
def select_uids(df_eval, model, metric='mae', mode='random', k=8):
"Select k unique_ids: 'random', 'top' (best), or 'flop' (worst) by metric"
sub = df_eval.query("metric == @metric")
if mode == 'random': return random.sample(list(sub['unique_id'].unique()), k=k)
ascending = mode == 'top'
return sub.nsmallest(k, model)['unique_id'].tolist() if ascending else sub.nlargest(k, model)['unique_id'].tolist()uids = select_uids(df_eval.query("cutoff == @cutoff"), 'SeasonalNaive', metric='mae', mode='flop')
plot_grid(Y_df, cv_df.query("cutoff == @cutoff"),
df_eval.query("cutoff == @cutoff"),
model='SeasonalNaive', plot_random=False, ids=uids, date_fmt="month")
def plot_grid_compact(df_train, df_test=None, df_eval=None, model=None, level=None, ids=None, descs=None, date_fmt=None, ncols=4, figw=28, rowh=3):
if model is None and df_test is not None:
models = [c for c in df_test.columns if c not in ('unique_id', 'ds', 'y', 'y_test', 'cutoff')]
assert len(models) == 1, f"Multiple models found: {models}. Please specify `model`."
model = models[0]
if ids is None: ids = random.sample(list(df_train['unique_id'].unique()), k=min(16, len(df_train['unique_id'].unique())))
nrows = -(-len(ids) // ncols)
fig, axes = plt.subplots(nrows, ncols, figsize=(figw, rowh * nrows), sharex='col', squeeze=False)
handles, labels = [], []
for i, uid in enumerate(ids):
r, c = divmod(i, ncols)
ax = axes[r][c]
train_uid = df_train.query('unique_id == @uid')
line, = ax.plot(train_uid['ds'], train_uid['y'], label='y')
train_color = line.get_color()
if df_test is not None:
test_uid = df_test.query('unique_id == @uid')
ax.axvline(x=test_uid['cutoff'].iloc[0], color='grey', linestyle='--', label='cutoff')
for col in ['y', f'{model}', 'y_test']:
if col not in test_uid: continue
if col == 'y': ax.plot(test_uid['ds'], test_uid[col], color=train_color, label='_nolegend_')
else: ax.plot(test_uid['ds'], test_uid[col], label=col)
if level is not None:
for l, alpha in zip(sorted(level), [0.5, .4, .35, .2]):
ax.fill_between(test_uid['ds'], test_uid[f'{model}-lo-{l}'], test_uid[f'{model}-hi-{l}'], alpha=alpha, color='orange', label=f'{model}_level_{l}')
title = f'UID: {uid}'
if descs is not None and uid in descs['unique_id'].values:
title = f"{descs.query('unique_id == @uid')['desc'].values[0]} | {title}"
if df_eval is not None and model is not None:
eval_uid = df_eval.query('unique_id == @uid')
if not eval_uid.empty and model in eval_uid.columns:
metrics = ' | '.join(f'{r.metric}: {r[model]:.2f}' for _, r in eval_uid.iterrows())
title += f' | {metrics}'
ax.set_title(title, fontsize=9, fontweight='bold')
ax.set_ylim(bottom=0)
ax.grid(True, alpha=0.3)
if i == 0: handles, labels = ax.get_legend_handles_labels()
if date_fmt in ('month', 'monthly'):
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%Y'))
elif date_fmt == 'quarter':
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{mdates.num2date(x):%Y} Q{(mdates.num2date(x).month-1)//3+1}"))
if date_fmt and r == nrows - 1: plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')
for i in range(len(ids), nrows * ncols): axes[divmod(i, ncols)].set_visible(False)
fig.legend(handles, labels, loc='upper center', ncol=len(labels), bbox_to_anchor=(0.5, 1.02), fontsize=10)
fig.subplots_adjust(hspace=0.45)
plt.show()plot_grid_compact(Y_df, cv_df.query("cutoff == @cutoff"), df_eval.query("cutoff == @cutoff"), model='SeasonalNaive', descs=descs)