diff --git a/NAMESPACE b/NAMESPACE index 25ee7a2..2ac8dd3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,4 @@ # Generated by roxygen2: do not edit by hand export(bayesmetab) -import(R2jags) +import(jagsUI) diff --git a/R/bayesmetab.R b/R/bayesmetab.R index decfc64..dc66d6e 100644 --- a/R/bayesmetab.R +++ b/R/bayesmetab.R @@ -21,14 +21,11 @@ #' #' @return A dataframe and csv file of parameter estimates (mean, SD) and checks of model fit, plots of model fit (see Vignette for details https://github.com/dgiling/BASEmetab/blob/master/vignettes/BASEmetab.pdf). #' -#'@references Grace et al. (2015) Fast processing of diel oxygen curves: estimating stream metabolism with BASE (BAyesian Single-station Estimation). Limnology and Oceanography: Methods, 13, 103-114. +#' @references Grace et al. (2015) Fast processing of diel oxygen curves: estimating stream metabolism with BASE (BAyesian Single-station Estimation). Limnology and Oceanography: Methods, 13, 103-114. #' #' @author Darren Giling, Ralph Mac Nally #' @examples #' -#' ##Link to JAGS -#' library(R2jags) -#' #' ##View example data set. #' #set path to example data. #' data.dir <- system.file("extdata", package = "BASEmetab") @@ -47,38 +44,37 @@ #' results <- bayesmetab(data.dir, results.dir, interval=600) #' #' @export -#' @import R2jags +#' @import jagsUI -bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n.iter*0.5, K.init = 2, - smooth.DO=0, smooth.PAR=FALSE, instant=FALSE, update.chains = TRUE, extra.iter=1, - K.est = TRUE, K.meas.mean = 0, K.meas.sd = 4, p.est=FALSE, theta.est=FALSE) - { +bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n.iter*0.5, K.init = 2, + smooth.DO=0, smooth.PAR=FALSE, instant=FALSE, update.chains = TRUE, extra.iter=1, + K.est = TRUE, K.meas.mean = 0, K.meas.sd = 4, p.est=FALSE, theta.est=FALSE) { start.time<-NULL; start.time<-Sys.time() - + # model file model.dir <- system.file("tools", package = "BASEmetab") - + # define functions smooth5 <- function(x) (zoo::rollapply(x, 5, mean, na.rm=T,align="center")) # moving average of 5 time intervals - + # data input and set up output table dataframes - filenames<-list.files(file.path(data.dir)) - + filenames<-list.files(file.path(data.dir)) + # Set up output tables output.table<-NULL - output.table<-data.frame(File=character(), Date=character(), GPP.mean=double(), GPP.sd=double(), ER.mean=double(), ER.sd=double(), - K.mean=double(), K.sd=double(), theta.mean=double(), theta.sd=double(), A.mean=double(), A.sd=double(), p.mean=double(), p.sd=double(), + output.table<-data.frame(File=character(), Date=character(), GPP.mean=double(), GPP.sd=double(), ER.mean=double(), ER.sd=double(), + K.mean=double(), K.sd=double(), theta.mean=double(), theta.sd=double(), A.mean=double(), A.sd=double(), p.mean=double(), p.sd=double(), R2=double(), PPP=double(), rmse=double(), rmse.relative=double(), mrl.fraction=double(), ER.K.cor=double(), convergence.check=double(), A.Rhat=double(), - K.Rhat=double(), theta.Rhat=double(), p.Rhat=double(), R.Rhat=double(), GPP.Rhat=double(), DIC=double(), pD=double(), + K.Rhat=double(), theta.Rhat=double(), p.Rhat=double(), R.Rhat=double(), GPP.Rhat=double(), DIC=double(), pD=double(), stringsAsFactors=FALSE) - instant.rates<-data.frame(File=character(), Date=character(), interval=integer(), + instant.rates<-data.frame(File=character(), Date=character(), interval=integer(), tempC=double(), I=double(), K.instant=double(), GPP.instant=double(), ER.instant=double(), stringsAsFactors=FALSE) - + # Analyse files sequentially for (fname in filenames) { #fname <- filenames[1] - + data<-read.csv(file.path(data.dir,fname), head=T) # read next file seconds<-86400 req.rows <- 86400/interval @@ -86,13 +82,13 @@ bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n x = 0:(N-1) ## checks - # check headers - if (! all(colnames(data) %in% c("Date","Time","I", "tempC", "DO.meas", "atmo.pressure", "salinity"))) { - stop(paste0("Column headers in input file '", fname, "' do not include 'Date', 'Time', 'I', 'tempC', 'DO.meas', 'atmo.pressure' and 'salinity' (they are case sensitive)")) } - - # check dates for "/" and replace with "-" - data$Date <- gsub("/", "-", data$Date) - + # check headers + if (! all(colnames(data) %in% c("Date","Time","I", "tempC", "DO.meas", "atmo.pressure", "salinity", "note"))) { + stop(paste0("Column headers in input file '", fname, "' do not include 'Date', 'Time', 'I', 'tempC', 'DO.meas', 'atmo.pressure', 'salinity' and 'note' (they are case sensitive)")) } + + # check dates for "/" and replace with "-" + data$Date <- gsub("/", "-", data$Date) + ## Smoothing data if(smooth.DO > 0) { # fast Fourier transform smoothing - low pass filter @@ -104,11 +100,11 @@ bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n data$DO.smooth <- Re( fft( DO.fft_filtered, inverse=TRUE) / N ) noise <- data$DO.meas - data$DO.smooth } - + if(smooth.PAR == T) { data$I.smooth<-c(data$I[1:2],smooth5(data$I),data$I[nrow(data)-1],data$I[nrow(data)]) # moving average over 5 time intervals } - + # Select dates data$Date <- factor(data$Date, levels = unique(data$Date)) dates <- unique(data$Date) @@ -118,13 +114,13 @@ bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n warning(paste0("Not all dates in file '", fname, "' contain ", req.rows, " rows")) } dates <- dates[n.records == (seconds/interval)] # select only dates with full days - + ## Analyse days sequentially - for (d in dates) - { + for (d in dates) + { #d <- dates[2] data.sub <- data[data$Date == d,] - + # Define data vectors num.measurements <- nrow(data.sub) tempC <- data.sub$tempC @@ -132,17 +128,17 @@ bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n atmo.pressure <- data.sub$atmo.pressure DO.meas <- if(smooth.DO > 0) data.sub$DO.smooth else data.sub$DO.meas PAR <- if(smooth.PAR == TRUE) data.sub$I.smooth else data.sub$I - + # Initial values # Set these to something sensible if the model is becoming stuck in a bad parameter space - # These values here are expressed per timestep, not per day. Divide desired initial K (/day) by the number of timesteps in a day, as shown in default below + # These values here are expressed per timestep, not per day. Divide desired initial K (/day) by the number of timesteps in a day, as shown in default below inits <- function() { list(K = K.init / (86400/interval) ) } - + # Different random seeds kern=as.integer(runif(1000,min=1,max=10000)) iters=sample(kern,1) - - # Set + + # Set n.chains <- 3 n.thin <- 10 p.est.n <- as.numeric(p.est) @@ -150,122 +146,122 @@ bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n K.est.n <- as.numeric(K.est) K.meas.mean.ts <- K.meas.mean / (86400/interval) K.meas.sd.ts <- K.meas.sd / (86400/interval) - data.list <- list("num.measurements","interval","tempC","DO.meas","PAR","salinity","atmo.pressure", "K.init", - "K.est.n", "K.meas.mean.ts", "K.meas.sd.ts", "p.est.n", "theta.est.n") - + data.list <- list("num.measurements","interval","tempC","DO.meas","PAR","salinity","atmo.pressure", "K.init", + "K.est.n", "K.meas.mean.ts", "K.meas.sd.ts", "p.est.n", "theta.est.n") + # Define monitoring variables params=c("A","R","K","K.day","p","theta","tau","ER","GPP", "NEP","sum.obs.resid","sum.ppa.resid","PPfit","DO.modelled") - + ## Call jags ## - - # Set debug = T below to inspect each file for model convergence + + # Set debug = T below to inspect each file for model convergence # (inspect the main parameters for convergence using bgr diagrams, history, density and autocorrelation) # n.iter=1000; n.burnin=500 metabfit=NULL - metabfit <- do.call(R2jags::jags.parallel, - list(data=data.list, inits=inits, parameters.to.save=params, model.file = file.path(system.file(package="BASEmetab"), "BASE_metab_model_v2.3.txt"), + metabfit <- do.call(jagsUI::jags, + list(data=data.list, inits=inits, parameters.to.save=params, + model.file = file.path(system.file(package="BASEmetab"), "BASE_metab_model_v2.3.txt"), n.chains = n.chains, n.iter = n.iter, n.burnin = n.burnin, - n.thin = n.thin, n.cluster= n.chains, DIC = TRUE, - jags.seed = 123, digits=5)) - + n.thin = n.thin, DIC = TRUE, parallel = TRUE, + seed = 123, bugs.format=TRUE)) + # print(metabfit, digits=2) # to inspect results of last metabfit - + ## diagnostic summaries # Rhat (srf) test - srf<- metabfit$BUGSoutput$summary[,8] + srf<- metabfit$summary[,8] Rhat.test <- NULL Rhat.test <- ifelse(any(srf>1.1, na.rm=T)==TRUE,"Check convergence", "Fine") - + # Check for convergence and update once if requested if(update.chains == TRUE) { if(Rhat.test == "Check convergence") { - recompile(metabfit) - metabfit <- update(metabfit, n.iter=n.iter*extra.iter) - + #recompile(metabfit) + message(paste("Model not converged in", n.iter, "iterations, updating:")) + metabfit <- update(metabfit, n.iter=n.iter*extra.iter) + # Rhat (srf) test - second round in case metabfit is updated - srf<- metabfit$BUGSoutput$summary[,8] + srf<- metabfit$summary[,8] Rhat.test <- NULL Rhat.test <- ifelse(any(srf>1.1, na.rm=T)==TRUE,"Check convergence", "Fine") } } - + # autocorr test - metabfit.mcmc<-coda::as.mcmc(metabfit) - ac.lag1 <- autocorr.diag(metabfit.mcmc, lags = 1) + metabfit.mcmc<- metabfit$samples + ac.lag1 <- coda::autocorr.diag(metabfit.mcmc, lags = 1) auto.corr.test <- NULL auto.corr.test <- ifelse(any(abs(ac.lag1)>0.1, na.rm=T)==TRUE,"Check ac", "ac OK") - - PPP <- metabfit$BUGSoutput$summary["PPfit","mean"] # posterior predictive p-value - - DO.mod.means <- metabfit$BUGSoutput$mean$DO.modelled - DO.mod.sd <- metabfit$BUGSoutput$sd$DO.modelled - + + PPP <- metabfit$summary["PPfit","mean"] # posterior predictive p-value + + DO.mod.means <- metabfit$mean$DO.modelled + DO.mod.sd <- metabfit$sd$DO.modelled + R2 = cor(DO.mod.means,DO.meas)^2 - rmse = sqrt(sum((metabfit$BUGSoutput$mean$DO.modelled-DO.meas)^2)/length(DO.meas)) - post.mean.dev <- metabfit$BUGSoutput$mean$deviance - pD <- metabfit$BUGSoutput$pD - DIC <- metabfit$BUGSoutput$DIC - + rmse = sqrt(sum((metabfit$mean$DO.modelled-DO.meas)^2)/length(DO.meas)) + post.mean.dev <- metabfit$mean$deviance + pD <- metabfit$pD + DIC <- metabfit$DIC + DO.lag<-DO.meas[2:length(DO.meas)]-DO.meas[1:(length(DO.meas)-1)] ptpvar <- sqrt((sum((DO.lag)^2)/(length(DO.meas)-1))) # point to point variation rmse.relative <- rmse / ptpvar - - diff<-metabfit$BUGSoutput$mean$DO.modelled-DO.meas + + diff<-metabfit$mean$DO.modelled-DO.meas mrl.max<-max(rle(sign(as.vector(diff)))$lengths) mrl.fraction<-max(rle(sign(as.vector(diff)))$lengths)/length(DO.meas) # proportion of largest run - - ER.K.cor <- cor(metabfit$BUGSoutput$sims.list$ER,metabfit$BUGSoutput$sims.list$K) # plot(metabfit$sims.list$ER ~ metabfit$sims.list$K) - + + ER.K.cor <- cor(metabfit$sims.list$ER,metabfit$sims.list$K) # plot(metabfit$sims.list$ER ~ metabfit$sims.list$K) + # insert results to table and write table - result <- data.frame(File=as.character(fname), Date=as.character(d), metabfit$BUGSoutput$mean$GPP, metabfit$BUGSoutput$sd$GPP, metabfit$BUGSoutput$mean$ER, metabfit$BUGSoutput$sd$ER, metabfit$BUGSoutput$mean$K.day, - metabfit$BUGSoutput$sd$K.day, metabfit$BUGSoutput$mean$theta, metabfit$BUGSoutput$sd$theta, metabfit$BUGSoutput$mean$A, metabfit$BUGSoutput$sd$A, metabfit$BUGSoutput$mean$p, metabfit$BUGSoutput$sd$p, - R2, PPP, rmse, rmse.relative, mrl.fraction, ER.K.cor, Rhat.test, metabfit$BUGSoutput$summary["A",8] , metabfit$BUGSoutput$summary["K",8], - metabfit$BUGSoutput$summary["theta",8], metabfit$BUGSoutput$summary["p",8], metabfit$BUGSoutput$summary["R",8], metabfit$BUGSoutput$summary["GPP",8], DIC, pD, + result <- data.frame(File=as.character(fname), Date=as.character(d), metabfit$mean$GPP, metabfit$sd$GPP, metabfit$mean$ER, metabfit$sd$ER, metabfit$mean$K.day, + metabfit$sd$K.day, metabfit$mean$theta, metabfit$sd$theta, metabfit$mean$A, metabfit$sd$A, metabfit$mean$p, metabfit$sd$p, + R2, PPP, rmse, rmse.relative, mrl.fraction, ER.K.cor, Rhat.test, metabfit$summary["A",8] , metabfit$summary["K",8], + metabfit$summary["theta",8], metabfit$summary["p",8], metabfit$summary["R",8], metabfit$summary["GPP",8], DIC, pD, stringsAsFactors = FALSE) output.table[nrow(output.table)+1,] <- result write.csv(output.table, file=file.path(results.dir, "BASE_results.csv")) # output file overwritten at each iteration - + # insert results to instantaneous table and write if(instant == TRUE) { instant.result <- data.frame(File=as.character(rep(fname,seconds/interval)), Date=as.character(rep(d,seconds/interval)),interval=1:(seconds/interval), - tempC=tempC, I=PAR, - K.instant=as.vector(metabfit$BUGSoutput$mean$K) * 1.0241^(tempC-mean(tempC)), - GPP.instant=as.vector(metabfit$BUGSoutput$mean$A) * PAR^as.vector(metabfit$BUGSoutput$mean$p), - ER.instant=as.vector(metabfit$BUGSoutput$mean$R) * as.vector(metabfit$BUGSoutput$mean$theta)^(tempC-mean(tempC)), + tempC=tempC, I=PAR, + K.instant=as.vector(metabfit$mean$K) * 1.0241^(tempC-mean(tempC)), + GPP.instant=as.vector(metabfit$mean$A) * PAR^as.vector(metabfit$mean$p), + ER.instant=as.vector(metabfit$mean$R) * as.vector(metabfit$mean$theta)^(tempC-mean(tempC)), stringsAsFactors = FALSE) instant.rates[(nrow(instant.rates)+1):(nrow(instant.rates)+(seconds/interval)),] <- instant.result write.csv(instant.rates, file=file.path(results.dir, "instantaneous_rates.csv")) # output file name } - + # diagnostic multi-panel plot jpeg(file=file.path(results.dir, paste0(substr(fname, 1,(nchar(fname)-4)),"_", as.character(d), ".jpg")), width=1200, height=1200, pointsize=30, quality=300) - + par(mfrow=c(3,3), mar=c(3,4,2,1), oma=c(0.1,0.1,0.1,0.1)) - traceplot(metabfit, varname=c('A','p','R','K.day','theta'), ask=FALSE, mfrow=c(3,3), new=FALSE) - + jagsUI::traceplot(metabfit.mcmc[, c('A','p','R','K.day','theta')], ask=FALSE, mfrow=c(3,3), new=FALSE) + plot(1:num.measurements,data.sub$DO.meas, type="p",pch=21, col="grey60",cex=0.8, ylim=c(min(DO.mod.means-DO.mod.sd)-0.5,max(DO.mod.means+DO.mod.sd)+0.5), xlab="Timestep", ylab="DO mg/L") if(smooth.DO > 0) { points(1:num.measurements,data.sub$DO.smooth,type='l',lwd=2,xlab="Timestep", col="red", cex=0.75) } - points(1:num.measurements,DO.mod.means,lwd=1.5, type="l", xlab="Timestep", col="black") + points(1:num.measurements,DO.mod.means,lwd=1.5, type="l", xlab="Timestep", col="black") points(1:num.measurements,DO.mod.means+DO.mod.sd, type="l", lty=2) points(1:num.measurements,DO.mod.means-DO.mod.sd, type="l", lty=2) legend(x="topleft", legend=c("DO meas", "DO smooth", "DO fit"), pch=c(1,NA,NA), lty=c(NA,1,1), col=c("grey60","red", "black"), cex=0.75, bty='n') - + plot(1:num.measurements,tempC,pch=1,xlab="Timestep" , typ='p', col="grey60") legend(x="topleft", legend=c("TempC meas"), pch=c(1), col=c("grey60"), cex=0.75, bty='n') - + plot(1:num.measurements,data.sub$I,pch=1,xlab="Timestep" , typ='p', col="grey60", ylab='PAR') if(smooth.PAR==TRUE) { points(1:num.measurements,data.sub$I.smooth,type='l',lwd=2,xlab="Timestep", col="red", cex=0.75) } legend(x="topleft", legend=c("PAR meas", "PAR smooth"), pch=c(1,NA), lty=c(NA,1), col=c("grey60","red"), cex=0.75, bty='n') - + graphics.off() - + } } - + end.time<-NULL; end.time<-Sys.time() print(end.time-start.time) return(output.table) - -} - +} diff --git a/basemetab.Rproj b/basemetab.Rproj index 497f8bf..270314b 100644 --- a/basemetab.Rproj +++ b/basemetab.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace