diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml new file mode 100644 index 0000000..08a5ede --- /dev/null +++ b/.github/workflows/draft-pdf.yml @@ -0,0 +1,23 @@ +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper.pdf \ No newline at end of file diff --git a/Makefile b/Makefile index 232b006..9bae6a1 100644 --- a/Makefile +++ b/Makefile @@ -135,6 +135,9 @@ render: ## run R markdown file in /vignettes, open rendered HTML Rscript -e "rmarkdown::render(paste0('./vignettes/', '$$filename', '.Rmd'))"; \ python3 -c "$$BROWSER_PYSCRIPT" "$$filename.html" +render-paper: + Rscript -e "rmarkdown::render('./paper-v2.md')" + setwd: ## set working directory to current directory Rscript -e "setwd('.')" diff --git a/NAMESPACE b/NAMESPACE index 6e889be..a5a1080 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export("ridge2f") export("ridge") export("varf") export("createtrendseason") +export("get_error") export("getreturns") export("getsimulations") export("removenas") @@ -54,6 +55,4 @@ export(fitdistr_ahead) export(quantile_scp) export(genericforecast) export(conformalize) -export(mlf) -export(glmthetaf) -export(computeattention) \ No newline at end of file +export(mlf) \ No newline at end of file diff --git a/R/compute_attention.R b/R/compute_attention.R deleted file mode 100644 index 52ea105..0000000 --- a/R/compute_attention.R +++ /dev/null @@ -1,41 +0,0 @@ -#' Compute global attention weights and context vectors for time series -#' -#' @param series Numeric vector containing the time series of length n -#' @return List containing: -#' \item{attention_weights}{n × n matrix where entry (i,j) represents the attention -#' weight of time j on time i. Only entries j <= i are non-zero (causal attention).} -#' \item{context_vectors}{Vector of length n where each entry i is the weighted sum -#' of all values up to time i, using the attention weights.} -#' @examples -#' # For a series of length 5 -#' series <- c(1, 2, 3, 4, 5) -#' result <- computeattention(series) -#' -#' # attention_weights will be 5x5 matrix -#' # context_vectors will be length 5 -#' dim(result$attention_weights) # [1] 5 5 -#' length(result$context_vectors) # [1] 5 -#' @export -computeattention <- function(series) { - # Input validation - if (!is.numeric(series)) stop("series must be numeric") - - # Compute attention weights using Rcpp function - attention_weights <- compute_attention_cpp(series) - - # Compute context vectors using Rcpp function - context_vectors <- compute_context_vectors_cpp(series, attention_weights) - - # Add dimension names for clarity - n <- length(series) - dimnames(attention_weights) <- list( - paste0("t", 1:n), - paste0("t", 1:n) - ) - names(context_vectors) <- paste0("t", 1:n) - - return(list( - attention_weights = attention_weights, - context_vectors = context_vectors - )) -} \ No newline at end of file diff --git a/R/glmtheta.R b/R/glmtheta.R deleted file mode 100644 index 3aa48d3..0000000 --- a/R/glmtheta.R +++ /dev/null @@ -1,287 +0,0 @@ -#' @title Generalized Linear Model Theta Forecast -#' @description This function implements the Theta method using a Generalized Linear Model (GLM) -#' @param y The time series data -#' @param h The number of periods to forecast -#' @param level The confidence level for the forecast intervals -#' @param fit_func The function to use for fitting the GLM -#' @param fan Logical flag for fan plot -#' @param x The time series data -#' @param attention Logical flag for using attention mechanism -#' @param scale_ctxt Scaling coefficient for context vector -#' @param ... Additional arguments to pass to the fit_func -#' @return A forecast object -#' @export -glmthetaf <- function (y, - h = ifelse(frequency(y) > 1, 2 * frequency(y), 10), - level = 95, - fit_func = stats::glm, - fan = FALSE, - x = y, - type_pi = c( - "conformal-split", - "gaussian" - ), - attention = TRUE, - scale_ctxt = 1, - method = c("adj", "base"), - ...) -{ - method <- match.arg(method) - type_pi <- match.arg(type_pi) - stopifnot(scale_ctxt > 0 && scale_ctxt <= 1) - if (grepl("conformal", type_pi) >= 1) # not conformal - { - stopifnot(length(level) == 1) - freq_x <- frequency(y) - start_x <- start(y) - start_preds <- tsp(y)[2] + 1 / freq_x - # Split the training data - y_train_calibration <- splitts(y, split_prob=0.5) - y_train <- y_train_calibration$training - y_calibration <- y_train_calibration$testing - h_calibration <- length(y_calibration) - # Get predictions on calibration set - y_pred_calibration <- ahead::glmthetaf( - y_train, - h = h_calibration, - fit_func = fit_func, - attention = attention, - method = method, - type_pi = "gaussian" - )$mean - # Final fit and forecast on full calibration set - fit_obj_train <- ahead::glmthetaf( - y_calibration, - h = h, - fit_func = fit_func, - attention = attention, - method = method, - type_pi = "gaussian" - ) - preds <- fit_obj_train$mean - calibrated_residuals <- y_calibration - y_pred_calibration - scaled_calibrated_residuals <- base::scale(calibrated_residuals) - sd_calibrated_residuals <- sd(calibrated_residuals) - - if (type_pi == "conformal-split") { - quantile_absolute_residuals_conformal <- quantile(abs(scaled_calibrated_residuals), - probs = level/100) - # Create output with proper time series attributes - out <- list( - mean = ts(preds, start = start_preds, frequency = freq_x), - lower = ts(preds - sd_calibrated_residuals*quantile_absolute_residuals_conformal, - start = start_preds, frequency = freq_x), - upper = ts(preds + sd_calibrated_residuals*quantile_absolute_residuals_conformal, - start = start_preds, frequency = freq_x), - sims = NULL, - x = y, - level = level, - method = "ConformalTheta", - residuals = ts(calibrated_residuals, - start = start(y), - frequency = freq_x) - ) - return(structure(out, class = "forecast")) - } - } - - if (fan) { - level <- seq(51, 99, by = 3) - } - else { - if (min(level) > 0 && max(level) < 1) { - level <- 100 * level - } - else if (min(level) < 0 || max(level) > 99.99) { - stop("Confidence limit out of range") - } - } - n <- length(x) - x <- as.ts(x) - m <- frequency(x) - if (m > 1 && !is.constant(x) && n > 2 * m) { - r <- as.numeric(acf(x, lag.max = m, plot = FALSE)$acf)[-1] - stat <- sqrt((1 + 2 * sum(r[-m]^2))/n) - seasonal <- (abs(r[m])/stat > qnorm(0.95)) - } - else { - seasonal <- FALSE - } - origx <- x - if (seasonal) { - decomp <- decompose(x, type = "multiplicative") - if (any(abs(seasonal(decomp)) < 1e-04)) { - warning("Seasonal indexes close to zero. Using non-seasonal Theta method") - } - else { - x <- seasadj(decomp) - } - } - - fcast <- ses(x, h = h) - alpha <- pmax(1e-10, fcast$model$par["alpha"]) - - # Try formula interface first, then matrix interface - time_idx <- 0:(n - 1) - df <- data.frame(y = x, t = time_idx) - - if (attention == FALSE) { - tmp2 <- try(fit_func(y ~ t, data = df, ...), silent = TRUE) - if (inherits(tmp2, "try-error")) { - # For matrix interface, include intercept term for methods like glmnet - X <- matrix(time_idx, nrow=1) - tmp2 <- try(fit_func(x = X, y = as.numeric(x), ...), silent = TRUE) - if (inherits(tmp2, "try-error")) { - stop("Unable to fit linear trend") - } - } - # Extract coefficient using generic method first, then fallback - slope <- try(coef(tmp2)[2], silent = TRUE) - if (inherits(slope, "try-error")) { - slope <- try(tmp2$coefficients[2], silent = TRUE) - if (inherits(slope, "try-error")) { - slope <- try(tmp2$coef[2], silent = TRUE) - if (inherits(slope, "try-error")) { - stop("Unable to extract slope coefficient") - } - } - } - tmp2 <- slope/2 # Divide by 2 as per theta method - fcast$mean <- fcast$mean + tmp2 * (0:(h - 1) + (1 - (1 - - alpha)^n)/alpha) - if (seasonal) { - fcast$mean <- fcast$mean * rep(tail(decomp$seasonal, - m), trunc(1 + h/m))[1:h] - fcast$fitted <- fcast$fitted * decomp$seasonal - } - fcast$residuals <- origx - fcast$fitted - fcast.se <- sqrt(fcast$model$sigma2) * sqrt((0:(h - 1)) * - alpha^2 + 1) - nconf <- length(level) - fcast$lower <- fcast$upper <- ts(matrix(NA, nrow = h, ncol = nconf)) - tsp(fcast$lower) <- tsp(fcast$upper) <- tsp(fcast$mean) - for (i in 1:nconf) { - zt <- -qnorm(0.5 - level[i]/200) - fcast$lower[, i] <- fcast$mean - zt * fcast.se - fcast$upper[, i] <- fcast$mean + zt * fcast.se - } - fcast$x <- origx - fcast$level <- level - fcast$method <- "Theta" - fcast$model <- list(alpha = alpha, drift = tmp2, sigma = fcast$model$sigma2) - fcast$model$call <- match.call() - return(fcast) - } else { # attention is TRUE - - if (method == "base") - { - #misc::debug_print(method) - df <- cbind.data.frame(df, ctx = ahead::computeattention(x)$context_vectors) - tmp2 <- try(fit_func(y ~ ., data = df, ...), silent = TRUE) - if (inherits(tmp2, "try-error")) { - X <- cbind.data.frame(time_idx, df$ctx) - tmp2 <- try(fit_func(x = as.matrix(X), y = as.numeric(x), ...), silent = TRUE) - if (inherits(tmp2, "try-error")) { - stop("Unable to fit linear trend with attention") - } - } - } else { # method == "adj": same as attention == FALSE to avoid adjusting the trend twice - #misc::debug_print(method) - tmp2 <- try(fit_func(y ~ t, data = df, ...), silent = TRUE) - if (inherits(tmp2, "try-error")) { - # For matrix interface, include intercept term for methods like glmnet - X <- matrix(time_idx, nrow=1) - tmp2 <- try(fit_func(x = X, y = as.numeric(x), ...), silent = TRUE) - if (inherits(tmp2, "try-error")) { - stop("Unable to fit linear trend") - } - } - # Extract coefficient using generic method first, then fallback - slope <- try(coef(tmp2)[2], silent = TRUE) - if (inherits(slope, "try-error")) { - slope <- try(tmp2$coefficients[2], silent = TRUE) - if (inherits(slope, "try-error")) { - slope <- try(tmp2$coef[2], silent = TRUE) - if (inherits(slope, "try-error")) { - stop("Unable to extract slope coefficient") - } - } - } - tmp2 <- slope/2 # Divide by 2 as per theta method - } - } - - if (method == 'base') - { - context_vectors <- log(ahead::computeattention(x)$context_vectors) - last_context <- tail(context_vectors, 1) - time_coef <- try(coef(tmp2)[2], silent = TRUE) - ctx_coef <- try(coef(tmp2)[3], silent = TRUE) - if (inherits(time_coef, "try-error") || inherits(ctx_coef, "try-error")) { - time_coef <- try(tmp2$coefficients[2], silent = TRUE) - ctx_coef <- try(tmp2$coefficients[3], silent = TRUE) - } - tmp2 <- time_coef/2 # Only divide time coefficient by 2 as per theta method - ctx_effect <- ctx_coef # Keep context coefficient as is - context_vectors <- log(ahead::computeattention(x)$context_vectors) - # Apply modified drift to forecast - fcast$mean[1] <- fcast$mean[1] + tmp2 * ((1-(1-alpha)^n)/alpha) + ctx_effect*context_vectors - newx <- c(y, fcast$mean[1]) - for (i in 2:h) - { - context_vectors <- log(ahead::computeattention(newx)$context_vectors) - # Modify drift based on context - fcast$mean[i] <- fcast$mean[i] + tmp2 * ((i - 1) + (1-(1-alpha)^n)/alpha) + ctx_effect*context_vectors - newx <- c(newx, fcast$mean[i]) - } - - - } else { # method == "adj" - context_vectors <- ahead::computeattention(x)$context_vectors - last_context <- tail(context_vectors, 1) - # Modify drift based on context - context_adjusted_drift <- tmp2 * (1 + scale_ctxt * sign(last_context) * - abs(last_context / mean(abs(y)))) - # Apply modified drift to forecast - fcast$mean[1] <- fcast$mean[1] + context_adjusted_drift * ((1-(1-alpha)^n)/alpha) - newx <- c(y, fcast$mean[1]) - for (i in 2:h) - { - context_vectors <- ahead::computeattention(newx)$context_vectors - last_context <- tail(context_vectors, 1) - # Modify drift based on context - context_adjusted_drift <- tmp2 * (1 + scale_ctxt * sign(last_context) * - abs(last_context / mean(abs(newx)))) - fcast$mean[i] <- fcast$mean[i] + context_adjusted_drift * ((i - 1) + (1-(1-alpha)^n)/alpha) - newx <- c(newx, fcast$mean[i]) - } - } - - if (seasonal) { - fcast$mean <- fcast$mean * rep(tail(decomp$seasonal, - m), trunc(1 + h/m))[1:h] - fcast$fitted <- fcast$fitted * decomp$seasonal - } - fcast$residuals <- origx - fcast$fitted - fcast.se <- sqrt(fcast$model$sigma2) * sqrt((0:(h - 1)) * - alpha^2 + 1) - nconf <- length(level) - fcast$lower <- fcast$upper <- ts(matrix(NA, nrow = h, ncol = nconf)) - tsp(fcast$lower) <- tsp(fcast$upper) <- tsp(fcast$mean) - for (i in 1:nconf) { - zt <- -qnorm(0.5 - level[i]/200) - fcast$lower[, i] <- fcast$mean - zt * fcast.se - fcast$upper[, i] <- fcast$mean + zt * fcast.se - } - fcast$x <- origx - fcast$level <- level - fcast$method <- "Theta" - fcast$model <- list(alpha = alpha, drift = tmp2, sigma = fcast$model$sigma2) - fcast$model$call <- match.call() - return(fcast) -} - - - - - diff --git a/man/computeattention.Rd b/man/computeattention.Rd deleted file mode 100644 index f597ed3..0000000 --- a/man/computeattention.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compute_attention.R -\name{computeattention} -\alias{computeattention} -\title{Compute global attention weights and context vectors for time series} -\usage{ -computeattention(series) -} -\arguments{ -\item{series}{Numeric vector containing the time series of length n} -} -\value{ -List containing: -\item{attention_weights}{n × n matrix where entry (i,j) represents the attention -weight of time j on time i. Only entries j <= i are non-zero (causal attention).} -\item{context_vectors}{Vector of length n where each entry i is the weighted sum -of all values up to time i, using the attention weights.} -} -\description{ -Compute global attention weights and context vectors for time series -} -\examples{ -# For a series of length 5 -series <- c(1, 2, 3, 4, 5) -result <- computeattention(series) - -# attention_weights will be 5x5 matrix -# context_vectors will be length 5 -dim(result$attention_weights) # [1] 5 5 -length(result$context_vectors) # [1] 5 -} diff --git a/man/glmthetaf.Rd b/man/glmthetaf.Rd deleted file mode 100644 index 500b9e3..0000000 --- a/man/glmthetaf.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/glmtheta.R -\name{glmthetaf} -\alias{glmthetaf} -\title{Generalized Linear Model Theta Forecast} -\usage{ -glmthetaf( - y, - h = ifelse(frequency(y) > 1, 2 * frequency(y), 10), - level = 95, - fit_func = stats::glm, - fan = FALSE, - x = y, - type_pi = c("conformal-split", "gaussian"), - attention = TRUE, - scale_ctxt = 1, - method = c("adj", "base"), - ... -) -} -\arguments{ -\item{y}{The time series data} - -\item{h}{The number of periods to forecast} - -\item{level}{The confidence level for the forecast intervals} - -\item{fit_func}{The function to use for fitting the GLM} - -\item{fan}{Logical flag for fan plot} - -\item{x}{The time series data} - -\item{attention}{Logical flag for using attention mechanism} - -\item{scale_ctxt}{Scaling coefficient for context vector} - -\item{...}{Additional arguments to pass to the fit_func} -} -\value{ -A forecast object -} -\description{ -This function implements the Theta method using a Generalized Linear Model (GLM) -} diff --git a/paper.md b/paper.md new file mode 100644 index 0000000..1339531 --- /dev/null +++ b/paper.md @@ -0,0 +1,252 @@ +--- +title: 'ahead: Univariate and multivariate time series forecasting with uncertainty quantification including simulation approaches' +tags: +- Python +- R +- Julia +- Time series +- Forecasting +- Machine Learning +- Uncertainty +date: "March 25, 2025" +output: + pdf_document: default + word_document: default + html_document: + df_print: paged +authors: +- name: Thierry Moudiki + orcid: "0000-0002-9422-5459" + affiliation: 1 +affiliations: +- name: "Techtonique LLC" + index: 1 +bibliography: biblio.bib +--- + +# Summary + +`ahead` contains multiple time series forecasting models, and provides uncertainty quantification through parametric and non-parametric methods, including simulation-based approaches. This paper presents two original Machine Learning models implemented in R version of `ahead` package for forecasting univariate and multivariate time series: `dynrmf`, an autoregressive model that can utilize any Machine Learning model for forecasting univariate time series, and `ridge2f` that extends ridge regression with two regularization parameters and a hidden layer for producing nonlinear outputs. + +# Statement of need + +Forecasting multivariate time series (MTS hereafter, and including the univariate case) is important for business planning and decision support in finance (stress-testing with financial scenarios), insurance (reserving and required capital valuation), and other industries such as *Energy* (load anticipation) and meteorology. One can obtain point forecasts from a statistical/Machine Learning (ML) model, but these point forecasts are generally of limited importance to analysts. What matters more is the model's ability to quantify the uncertainty around its predictions. + +There are multiple MTS forecasting models available in [\textsf{R} package](https://github.com/Techtonique/ahead) `ahead`'s version `0.18.0` ([\textsf{Python}](https://github.com/Techtonique/ahead_python) and [\textsf{Julia}](https://github.com/Techtonique/Ahead.jl) implementations, following \textsf{R}'s API as closely as possible). R version of `ahead` itself is available at [r-packages.techtonique.net](https://r-packages.techtonique.net/), and distributed across all major operating systems. + +`ahead`'s models generally include parametric prediction intervals alongside non-parametric ones, simulation-based uncertainty quantification techniques. This paper describes **two** of these ML models, **not available in any other statistical software**: + +* \mbox{\texttt{dynrmf}}; an autoregressive dynamic model inspired by **N**eural **N**etwork **A**uto**r**egression (NNAR) (from @hyndman2013forecasting), and described with more details in [https://otexts.com/fpp2/nnetar.html#neural-network-autoregression](https://otexts.com/fpp2/nnetar.html#neural-network-autoregression). As NNAR, \mbox{\texttt{dynrmf}} does an automatic choice of the number of autoregressive and seasonal time series lags. \mbox{\texttt{dynrmf}} is however more generic, and __can use any ML model__ (the default model is [Ridge Regression](https://en.wikipedia.org/wiki/Ridge_regression)). + +* \mbox{\texttt{ahead::ridge2f}} (@moudiki2018multiple) implements a __quasi-randomized *neural* networks__ model extending Ridge_regression to 2 regularization parameters, and capable of producing nonlinear outputs thanks to the use of a *hidden layer*. Since its first publication in 2018, \mbox{\texttt{ahead::ridge2f}} has been enhanced for integrating predictive uncertainty quantification through the (independent/block) bootstrap (@efron1986bootstrap) and copulas' simulation(@brechmann2013modeling, @nagler2023vine). As of march 2025, conformal prediction which improves the quality of prediction intervals (see @vovk2005algorithmic) has been included. + +# Examples + +## Install `ahead` in \textsf{R} + +```r +options(repos = c( + techtonique = "https://r-packages.techtonique.net", + CRAN = "https://cloud.r-project.org" + )) +utils::install.packages("rmarkdown", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("remotes", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("forecast", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("fpp", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("ggplot2", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("e1071", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("randomForest", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("dfoptim", repos = c(CRAN="https://cloud.r-project.org")) +utils::install.packages("ahead") +``` + +```r +library(ahead) +library(forecast) +library(ggplot2) +library(randomForest) +library(e1071) +``` + +## Use `ahead::ridge2f` + +### Use `ahead::ridge2f` for univariate time series forecasting + +In all the examples presented below, default hyperparameters are used: 5 nodes in the hidden layer (see @moudiki2018multiple for more details) and a ReLU activation function (see @goodfellow2016deep) are used. + +The `fdeaths` data set below contains **monthly deaths of females from various diseases in the UK, 1974-1979**. Here's how to obtain 20-steps-ahead forecasts for `fdeaths` with `ahead::ridge2f`; including seasonality terms. The default level for the prediction interval is equal to 95%. + +```r +x <- fdeaths # input dataset +xreg <- ahead::createtrendseason(x) # add seasonality and trend +z <- ahead::ridge2f(x, xreg = xreg, h=20L) # forecasting h-steps ahead +``` + +```r +ggplot2::autoplot(z) # plot forecast +``` + +![Prediction intervals for `ahead::ridge2f` model on mortality data](paper_files/paper_11_0.png) + + +`EuStocksLogReturns` contains the daily log-returns of major European stock indices, with 1860 observations. Only the first 100 dates of the DAX index dataset are used in the example below. + + +```r +data(EuStockMarkets) +EuStocks <- ts(EuStockMarkets[1:100, ], + start = start(EuStockMarkets), + frequency = frequency(EuStockMarkets)) # original data +EuStocksLogReturns <- ahead::getreturns(EuStocks, type = "log") # obtain log-returns +res <- ahead::ridge2f(EuStocksLogReturns[, "DAX"], h = 20L, +type_pi = "movingblockbootstrap", + show_progress = FALSE) +``` + +```r +ggplot2::autoplot(res) # plot forecast +``` + +![Prediction intervals for `ahead::ridge2f` model on stocks log-returns](paper_files/paper_14_0.png) + + +### Use `ahead::dynrmf` for univariate time series forecasting + +`fdeaths` is used in this example too. The default model used by ahead::dynrmf is automated ridge regression, which automatically selects the regularization parameter using Leave-One-Out cross-validation (see @bergmeir2018note): + +**- Forecasting with `randomForest::randomForest`** + +```r +# Plotting forecasts +# With Random Forest regressor, horizon of 20, +# 95% prediction interval +fit_rf <- ahead::dynrmf(fdeaths, h=20, level=95, fit_func = randomForest::randomForest, + fit_params = list(ntree = 50), predict_func = predict) +``` + +```r +ggplot2::autoplot(fit_rf) +``` + +![Prediction intervals for `ahead::dynrmf` model on mortality data](paper_files/paper_17_0.png) + + +Check in-sample residuals: + +```r +forecast::checkresiduals(fit_rf) +``` + + + Ljung-Box test + + data: Residuals from DynRM 1,1[12] + Q* = 9.8649, df = 12, p-value = 0.6278 + + Model df: 0. Total lags used: 12 + + +![Residuals diagnostics for `ahead::dynrmf` model on mortality data](paper_files/paper_19_1.png) + + +**- Forecasting with `e1071::svm`** (Support Vector Machines) + +```r +# With Support Vector Machine regressor, horizon of 20, +# 95% prediction interval +fit_svm <- ahead::dynrmf(fdeaths, h=20, level=95, fit_func = e1071::svm, +fit_params = list(kernel = "linear"), predict_func = predict) +``` + +```r +ggplot2::autoplot(fit_svm) +``` + +![Prediction intervals for `ahead::dynrmf` model on mortality data (using Support Vector Machines regression)](paper_files/paper_22_0.png) + + +Check in-sample residuals: + +```r +forecast::checkresiduals(fit_svm) +``` + + + Ljung-Box test + + data: Residuals from DynRM 1,1[12] + Q* = 27.351, df = 12, p-value = 0.006875 + + Model df: 0. Total lags used: 12 + +![Residuals diagnostics for `ahead::dynrmf` model on mortality data (using Support Vector Machines regression)](paper_files/paper_24_1.png) + +**- Use of an external regressor** (trend) + +The `AirPassengers` dataset has been widely tested as a specialized benchmark in forecasting literature because it displays trend, seasonality, and heteroskedasticity. + +```r +h <- 20L # forecasting horizon +res6 <- ahead::dynrmf(AirPassengers, +xreg_fit = 1:length(AirPassengers), +xreg_predict = (length(AirPassengers)+1):(length(AirPassengers)+h), +h=h) +ggplot2::autoplot(res6) +``` + +![Prediction intervals for `ahead::dynrmf` model on `AirPassengers`](paper_files/paper_26_0.png) + + +### `ahead::ridge2f` for MTS forecasting + +The `insurance` dataset (@hyndman2013forecasting) contains monthly quotations and television advertising expenditure for a US insurance company from January 2002 to April 2005. Fast calibration of `ahead::ridge2f` relies on generalized leave-one-out cross-validation as it will be shown in the following \textsf{R} example. It's worth mentioning that **only the 2 regularization parameters are calibrated** here. Other model's hyperparameters such as the number of time series lags or the number of nodes in the hidden layer are set to their default values (respectively `1` and `5`). + + +```r +objective_function <- function(xx) +{ + ahead::loocvridge2f(fpp::insurance, + h = 20L, + type_pi="blockbootstrap", + lambda_1=10^xx[1], + lambda_2=10^xx[2], + show_progress = FALSE, + )$loocv +} +start <- proc.time()[3] +(opt <- dfoptim::nmkb(fn=objective_function, + lower=c(-10,-10), + upper=c(10,10), + par=c(0.1, 0.1))) +print(proc.time()[3]-start) +``` + +**Forecasting using the _optimal_ regularization parameters** + +The `plot` method (an [S3 method](https://cran.r-project.org/doc/manuals/R-exts.html#Registering-S3-methods)) from `ahead` is used to visualize the predictive simulations and prediction intervals for the `Quotes` and `TV.advert` time series. + +```r +start <- proc.time()[3] +res <- ahead::ridge2f(fpp::insurance, h = 20L, + type_pi="blockbootstrap", + B = 100L, # number of predictive simulations + lambda_1=10^opt$par[1], # 'optimal' parameters + lambda_2=10^opt$par[2]) # 'optimal' parameters +print(proc.time()[3]-start) + + +par(mfrow=c(2, 2)) +plot(res, "Quotes", type = "sims", +main = "predictive simulations") +plot(res, "TV.advert", type = "sims", +main = "predictive simulations") +plot(res, "Quotes", type = "dist", +main = "prediction intervals") +plot(res, "TV.advert", type = "dist", +main = "prediction intervals") +``` + +![Probabilistic (through simulation) Multivariate forecasting using `ahead::ridge2f`](paper_files/paper_30_1.png) + + +# Citations diff --git a/paper_files/paper_11_0.png b/paper_files/paper_11_0.png new file mode 100644 index 0000000..d25e8d2 Binary files /dev/null and b/paper_files/paper_11_0.png differ diff --git a/paper_files/paper_14_0.png b/paper_files/paper_14_0.png new file mode 100644 index 0000000..c7ee207 Binary files /dev/null and b/paper_files/paper_14_0.png differ diff --git a/paper_files/paper_17_0.png b/paper_files/paper_17_0.png new file mode 100644 index 0000000..a31fa10 Binary files /dev/null and b/paper_files/paper_17_0.png differ diff --git a/paper_files/paper_19_1.png b/paper_files/paper_19_1.png new file mode 100644 index 0000000..dc55c2e Binary files /dev/null and b/paper_files/paper_19_1.png differ diff --git a/paper_files/paper_22_0.png b/paper_files/paper_22_0.png new file mode 100644 index 0000000..254db98 Binary files /dev/null and b/paper_files/paper_22_0.png differ diff --git a/paper_files/paper_24_1.png b/paper_files/paper_24_1.png new file mode 100644 index 0000000..14bbc3e Binary files /dev/null and b/paper_files/paper_24_1.png differ diff --git a/paper_files/paper_26_0.png b/paper_files/paper_26_0.png new file mode 100644 index 0000000..5f87521 Binary files /dev/null and b/paper_files/paper_26_0.png differ diff --git a/paper_files/paper_30_1.png b/paper_files/paper_30_1.png new file mode 100644 index 0000000..6223676 Binary files /dev/null and b/paper_files/paper_30_1.png differ diff --git a/src/RcppExports.o b/src/RcppExports.o index 3d74402..996c977 100644 Binary files a/src/RcppExports.o and b/src/RcppExports.o differ diff --git a/src/ahead.so b/src/ahead.so index 8090cc5..5da73d8 100755 Binary files a/src/ahead.so and b/src/ahead.so differ diff --git a/src/compute_attention.o b/src/compute_attention.o index 0794b2d..766e227 100644 Binary files a/src/compute_attention.o and b/src/compute_attention.o differ diff --git a/src/embedc.o b/src/embedc.o index a783624..664e76b 100644 Binary files a/src/embedc.o and b/src/embedc.o differ diff --git a/src/utils_armagarch.o b/src/utils_armagarch.o index 1370592..f97977b 100644 Binary files a/src/utils_armagarch.o and b/src/utils_armagarch.o differ diff --git a/src/utils_matrix.o b/src/utils_matrix.o index e12fc8b..c1c3b13 100644 Binary files a/src/utils_matrix.o and b/src/utils_matrix.o differ diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf deleted file mode 100644 index 02bd878..0000000 Binary files a/tests/testthat/Rplots.pdf and /dev/null differ diff --git a/biblio.bib b/vignettes/biblio.bib similarity index 100% rename from biblio.bib rename to vignettes/biblio.bib diff --git a/vignettes/glmthetaf-attention-conformal.Rmd b/vignettes/glmthetaf-attention-conformal.Rmd deleted file mode 100644 index 601b84b..0000000 --- a/vignettes/glmthetaf-attention-conformal.Rmd +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: "Generalized Linear Model Theta Forecast with attention - conformal uncertainty" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Generalized Linear Model Theta Forecast with attention - conformal uncertainty} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -# USAccDeaths (method='adj') - - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::glm.nb, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::glm, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::rlm, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::lqs, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::lm, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=gam::gam, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=quantreg::rq, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -# AirPassengers (method='adj') - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::glm.nb, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::glm, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::rlm, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::lm, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::lqs, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=gam::gam, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=quantreg::rq, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` diff --git a/vignettes/glmthetaf-attention.Rmd b/vignettes/glmthetaf-attention.Rmd deleted file mode 100644 index bfc356c..0000000 --- a/vignettes/glmthetaf-attention.Rmd +++ /dev/null @@ -1,142 +0,0 @@ ---- -title: "Generalized Linear Model Theta Forecast with attention" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Generalized Linear Model Theta Forecast with attention} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -# USAccDeaths (method="adj") - -```{r fig.width=7.5} -library(forecast) -library(ahead) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::glm.nb, attention = TRUE, method = "adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::glm, attention = TRUE, method = "adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::rlm, attention = TRUE, method = "adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::lqs, attention = TRUE, method = "adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::lm, attention = TRUE, method = "adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=gam::gam, attention = TRUE, method = "adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=quantreg::rq, attention = TRUE, method = "adj"))) -plot(obj) -``` - - -# AirPassengers (method="adj") - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::glm.nb, attention = TRUE, method = "adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::glm, attention = TRUE, method = "adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::rlm, attention = TRUE, method = "adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::lm, attention = TRUE, method = "adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::lqs, attention = TRUE, method = "adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=gam::gam, attention = TRUE, method = "adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=quantreg::rq, attention = TRUE, method = "adj"))) -plot(obj) -``` - - -# USAccDeaths (method='adj') - - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::glm.nb, attention = TRUE, method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::glm, attention = TRUE, method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::rlm, attention = TRUE, method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::lqs, attention = TRUE, method="adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::lm, attention = TRUE, method="adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=gam::gam, attention = TRUE, method="adj"))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=quantreg::rq, attention = TRUE, method="adj"))) -plot(obj) -``` - -# AirPassengers (method='adj') - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::glm.nb, attention = TRUE, method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::glm, attention = TRUE, method="adj"))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::rlm, attention = TRUE, method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::lm, attention = TRUE, method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::lqs, attention = TRUE, method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=gam::gam, attention = TRUE, method="adj"))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=quantreg::rq, attention = TRUE, method="adj"))) -plot(obj) -``` diff --git a/vignettes/glmthetaf-ridge-attention.Rmd b/vignettes/glmthetaf-ridge-attention.Rmd deleted file mode 100644 index bb1ff87..0000000 --- a/vignettes/glmthetaf-ridge-attention.Rmd +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: "Ridge Theta Forecast with attention - conformal uncertainty" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Ridge Theta Forecast with attention - conformal uncertainty} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -# USAccDeaths - - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::glm.nb, attention = TRUE, type_pi = "conformal-split", method="adj"))) -plot(obj) -``` - -# AirPassengers - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::glm.nb, attention = TRUE, type_pi = "conformal-split", method="adj"))) -``` diff --git a/vignettes/glmthetaf.Rmd b/vignettes/glmthetaf.Rmd deleted file mode 100644 index 494a007..0000000 --- a/vignettes/glmthetaf.Rmd +++ /dev/null @@ -1,83 +0,0 @@ ---- -title: "Generalized Linear Model Theta Forecast" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Generalized Linear Model Theta Forecast} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -# USAccDeaths - -```{r fig.width=7.5} -library(forecast) -library(ahead) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::glm.nb, attention=FALSE))) - -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::glm, attention=FALSE))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::rlm, attention=FALSE))) -plot(obj) - -#(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=rvfl::rvfl, attention=FALSE))) -#plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=MASS::lqs, attention=FALSE))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=stats::lm, attention=FALSE))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=gam::gam, attention=FALSE))) -plot(obj) -``` - -```{r fig.width=7.5} -(obj <- suppressWarnings(ahead::glmthetaf(USAccDeaths, h=25L, fit_func=quantreg::rq, attention=FALSE))) -plot(obj) -``` - -# AirPassengers - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::glm.nb, attention=FALSE))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::glm, attention=FALSE))) -plot(obj) - -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::rlm, attention=FALSE))) -plot(obj) - -#(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=rvfl::rvfl, attention=FALSE))) -#plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=stats::lm, attention=FALSE))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=MASS::lqs, attention=FALSE))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=gam::gam, attention=FALSE))) -plot(obj) -``` - -```{r fig.width=5.5} -(obj <- suppressWarnings(ahead::glmthetaf(AirPassengers, h=25L, fit_func=quantreg::rq, attention=FALSE))) -plot(obj) -``` -