Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions neuralprophet/data/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _reshape_raw_predictions_to_forecst_df(
forecast = predicted[:, forecast_lag - 1, j]
pad_before = max_lags + forecast_lag - 1
pad_after = n_forecasts - forecast_lag
yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.nan)
if prediction_frequency is not None:
ds = df_forecast["ds"].iloc[pad_before : -pad_after if pad_after > 0 else None]
mask = df_utils.create_mask_for_prediction_frequency(
Expand All @@ -79,7 +79,7 @@ def _reshape_raw_predictions_to_forecst_df(
)
yhat = np.full((len(ds),), np.nan)
yhat[mask] = forecast
yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.nan)
# 0 is the median quantile index
if j == 0:
name = f"yhat{forecast_lag}"
Expand All @@ -104,7 +104,7 @@ def _reshape_raw_predictions_to_forecst_df(
forecast = components[comp][:, forecast_lag - 1, j] # 0 is the median quantile
pad_before = max_lags + forecast_lag - 1
pad_after = n_forecasts - forecast_lag
yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
yhat = np.pad(forecast, (pad_before, pad_after), mode="constant", constant_values=np.nan)
if prediction_frequency is not None:
ds = df_forecast["ds"].iloc[pad_before : -pad_after if pad_after > 0 else None]
mask = df_utils.create_mask_for_prediction_frequency(
Expand All @@ -114,7 +114,7 @@ def _reshape_raw_predictions_to_forecst_df(
)
yhat = np.full((len(ds),), np.nan)
yhat[mask] = forecast
yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.NaN)
yhat = np.pad(yhat, (pad_before, pad_after), mode="constant", constant_values=np.nan)
if j == 0: # temporary condition to add only the median component
name = f"{comp}{forecast_lag}"
df_forecast[name] = yhat
Expand All @@ -126,7 +126,7 @@ def _reshape_raw_predictions_to_forecst_df(
forecast_0 = components[comp][0, :, j]
forecast_rest = components[comp][1:, n_forecasts - 1, j]
yhat = np.pad(
np.concatenate((forecast_0, forecast_rest)), (max_lags, 0), mode="constant", constant_values=np.NaN
np.concatenate((forecast_0, forecast_rest)), (max_lags, 0), mode="constant", constant_values=np.nan
)
if prediction_frequency is not None:
date_list = []
Expand Down
4 changes: 2 additions & 2 deletions neuralprophet/data/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def _make_future_dataframe(
f"{nan_at_end + 1} missing values were detected at the end of df before df was extended into "
"the future. Please make sure there are no NaN values at the end of df."
)
df["y"].iloc[-(nan_at_end + 1) :].ffill(inplace=True)
df.loc[df.index[-(nan_at_end + 1) :], "y"] = df.loc[df.index[-(nan_at_end + 1) :], "y"].ffill()
log.warning(
f"{nan_at_end + 1} missing values were forward-filled at the end of df before df was extended into the "
"future. Please make sure there are no NaN values at the end of df."
Expand Down Expand Up @@ -266,7 +266,7 @@ def _make_future_dataframe(
regressors_df=regressors_df,
)
if len(df) > 0:
df = pd.concat([df, future_df])
df = pd.concat([df, future_df], ignore_index=True)
else:
df = future_df
df = df.reset_index(drop=True)
Expand Down
14 changes: 7 additions & 7 deletions neuralprophet/df_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ def check_dataframe(
if df["ds"].isnull().any():
raise ValueError("Found NaN in column ds.")
if not np.issubdtype(df["ds"].to_numpy().dtype, np.datetime64):
df["ds"] = pd.to_datetime(df.loc[:, "ds"], utc=True).dt.tz_convert(None)
df["ds"] = pd.to_datetime(df["ds"]).dt.tz_localize(None)
if df.groupby("ID").apply(lambda x: x.duplicated("ds").any()).any():
raise ValueError("Column ds has duplicate values. Please remove duplicates.")

Expand Down Expand Up @@ -1117,7 +1117,7 @@ def get_freq_dist(ds_col):
tuple
numeric delta values (``ms``) and distribution of frequency counts
"""
converted_ds = pd.to_datetime(ds_col, utc=True).view(dtype=np.int64)
converted_ds = pd.to_datetime(ds_col, utc=True).astype(np.int64)
diff_ds = np.unique(converted_ds.diff(), return_counts=True)
return diff_ds

Expand Down Expand Up @@ -1171,7 +1171,7 @@ def get_dist_considering_two_freqs(dist):

Note
----
Useful for the frequency exceptions (i.e. ``M``, ``Y``, ``Q``, ``B``, and ``BH``).
Useful for the frequency exceptions (i.e. ``ME``, ``YE``, ``QE``, ``B``, and ``bh``).

Parameters
----------
Expand Down Expand Up @@ -1252,18 +1252,18 @@ def _infer_frequency(df, freq, min_freq_percentage=0.7):
if argmax_frequency in MONTHLY_FREQUENCIES:
dominant_freq_percentage = _get_dominant_frequency_percentage(frequencies, distribution, MONTHLY_FREQUENCIES)
num_freq = 2.6784e15
inferred_freq = "MS" if pd.to_datetime(df["ds"].iloc[0]).day < 15 else "M"
inferred_freq = "MS" if pd.to_datetime(df["ds"].iloc[0]).day < 15 else "ME"
# exception - yearly df (365 days freq or 366 days freq)
elif argmax_frequency == 3.1536e16 or argmax_frequency == 3.16224e16:
dominant_freq_percentage = get_dist_considering_two_freqs(distribution) / len(df["ds"])
num_freq = 3.1536e16
inferred_freq = "YS" if pd.to_datetime(df["ds"].iloc[0]).day < 15 else "Y"
inferred_freq = "YS" if pd.to_datetime(df["ds"].iloc[0]).day < 15 else "YE"
# exception - quarterly df (most common == 92 days - 3rd,4th quarters and second most common == 91 days 2nd quarter
# and 1st quarter in leap year)
elif argmax_frequency == 7.9488e15 and frequencies[np.argsort(distribution, axis=0)[-2]] == 7.8624e15:
dominant_freq_percentage = get_dist_considering_two_freqs(distribution) / len(df["ds"])
num_freq = 7.9488e15
inferred_freq = "QS" if pd.to_datetime(df["ds"].iloc[0]).day < 15 else "Q"
inferred_freq = "QS" if pd.to_datetime(df["ds"].iloc[0]).day < 15 else "QE"
# exception - Business day (most common == day delta and second most common == 3 days delta and second most common
# is at least 12% of the deltas)
elif (
Expand All @@ -1283,7 +1283,7 @@ def _infer_frequency(df, freq, min_freq_percentage=0.7):
):
dominant_freq_percentage = get_dist_considering_two_freqs(distribution) / len(df["ds"])
num_freq = 3.6e12
inferred_freq = "BH"
inferred_freq = "bh"
else:
dominant_freq_percentage = distribution.max() / len(df["ds"])
num_freq = argmax_frequency # get value of most common diff
Expand Down
Loading
Loading