Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by roxygen2: do not edit by hand

export(bayesmetab)
import(R2jags)
import(jagsUI)
190 changes: 93 additions & 97 deletions R/bayesmetab.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,11 @@
#'
#' @return A dataframe and csv file of parameter estimates (mean, SD) and checks of model fit, plots of model fit (see Vignette for details https://github.com/dgiling/BASEmetab/blob/master/vignettes/BASEmetab.pdf).
#'
#'@references Grace et al. (2015) Fast processing of diel oxygen curves: estimating stream metabolism with BASE (BAyesian Single-station Estimation). Limnology and Oceanography: Methods, 13, 103-114.
#' @references Grace et al. (2015) Fast processing of diel oxygen curves: estimating stream metabolism with BASE (BAyesian Single-station Estimation). Limnology and Oceanography: Methods, 13, 103-114.
#'
#' @author Darren Giling, Ralph Mac Nally
#' @examples
#'
#' ##Link to JAGS
#' library(R2jags)
#'
#' ##View example data set.
#' #set path to example data.
#' data.dir <- system.file("extdata", package = "BASEmetab")
Expand All @@ -47,52 +44,51 @@
#' results <- bayesmetab(data.dir, results.dir, interval=600)
#'
#' @export
#' @import R2jags
#' @import jagsUI

bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n.iter*0.5, K.init = 2,
smooth.DO=0, smooth.PAR=FALSE, instant=FALSE, update.chains = TRUE, extra.iter=1,
K.est = TRUE, K.meas.mean = 0, K.meas.sd = 4, p.est=FALSE, theta.est=FALSE)
{
bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n.iter*0.5, K.init = 2,
smooth.DO=0, smooth.PAR=FALSE, instant=FALSE, update.chains = TRUE, extra.iter=1,
K.est = TRUE, K.meas.mean = 0, K.meas.sd = 4, p.est=FALSE, theta.est=FALSE) {
start.time<-NULL; start.time<-Sys.time()

# model file
model.dir <- system.file("tools", package = "BASEmetab")

# define functions
smooth5 <- function(x) (zoo::rollapply(x, 5, mean, na.rm=T,align="center")) # moving average of 5 time intervals

# data input and set up output table dataframes
filenames<-list.files(file.path(data.dir))
filenames<-list.files(file.path(data.dir))

# Set up output tables
output.table<-NULL
output.table<-data.frame(File=character(), Date=character(), GPP.mean=double(), GPP.sd=double(), ER.mean=double(), ER.sd=double(),
K.mean=double(), K.sd=double(), theta.mean=double(), theta.sd=double(), A.mean=double(), A.sd=double(), p.mean=double(), p.sd=double(),
output.table<-data.frame(File=character(), Date=character(), GPP.mean=double(), GPP.sd=double(), ER.mean=double(), ER.sd=double(),
K.mean=double(), K.sd=double(), theta.mean=double(), theta.sd=double(), A.mean=double(), A.sd=double(), p.mean=double(), p.sd=double(),
R2=double(), PPP=double(), rmse=double(), rmse.relative=double(), mrl.fraction=double(), ER.K.cor=double(), convergence.check=double(), A.Rhat=double(),
K.Rhat=double(), theta.Rhat=double(), p.Rhat=double(), R.Rhat=double(), GPP.Rhat=double(), DIC=double(), pD=double(),
K.Rhat=double(), theta.Rhat=double(), p.Rhat=double(), R.Rhat=double(), GPP.Rhat=double(), DIC=double(), pD=double(),
stringsAsFactors=FALSE)
instant.rates<-data.frame(File=character(), Date=character(), interval=integer(),
instant.rates<-data.frame(File=character(), Date=character(), interval=integer(),
tempC=double(), I=double(), K.instant=double(), GPP.instant=double(), ER.instant=double(),
stringsAsFactors=FALSE)

# Analyse files sequentially
for (fname in filenames) {
#fname <- filenames[1]

data<-read.csv(file.path(data.dir,fname), head=T) # read next file
seconds<-86400
req.rows <- 86400/interval
N = nrow(data)
x = 0:(N-1)

## checks
# check headers
if (! all(colnames(data) %in% c("Date","Time","I", "tempC", "DO.meas", "atmo.pressure", "salinity"))) {
stop(paste0("Column headers in input file '", fname, "' do not include 'Date', 'Time', 'I', 'tempC', 'DO.meas', 'atmo.pressure' and 'salinity' (they are case sensitive)")) }
# check dates for "/" and replace with "-"
data$Date <- gsub("/", "-", data$Date)
# check headers
if (! all(colnames(data) %in% c("Date","Time","I", "tempC", "DO.meas", "atmo.pressure", "salinity", "note"))) {
stop(paste0("Column headers in input file '", fname, "' do not include 'Date', 'Time', 'I', 'tempC', 'DO.meas', 'atmo.pressure', 'salinity' and 'note' (they are case sensitive)")) }

# check dates for "/" and replace with "-"
data$Date <- gsub("/", "-", data$Date)

## Smoothing data
if(smooth.DO > 0) {
# fast Fourier transform smoothing - low pass filter
Expand All @@ -104,11 +100,11 @@ bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n
data$DO.smooth <- Re( fft( DO.fft_filtered, inverse=TRUE) / N )
noise <- data$DO.meas - data$DO.smooth
}

if(smooth.PAR == T) {
data$I.smooth<-c(data$I[1:2],smooth5(data$I),data$I[nrow(data)-1],data$I[nrow(data)]) # moving average over 5 time intervals
}

# Select dates
data$Date <- factor(data$Date, levels = unique(data$Date))
dates <- unique(data$Date)
Expand All @@ -118,154 +114,154 @@ bayesmetab <- function(data.dir, results.dir, interval, n.iter=20000, n.burnin=n
warning(paste0("Not all dates in file '", fname, "' contain ", req.rows, " rows"))
}
dates <- dates[n.records == (seconds/interval)] # select only dates with full days

## Analyse days sequentially
for (d in dates)
{
for (d in dates)
{
#d <- dates[2]
data.sub <- data[data$Date == d,]

# Define data vectors
num.measurements <- nrow(data.sub)
tempC <- data.sub$tempC
salinity <-data.sub$salinity
atmo.pressure <- data.sub$atmo.pressure
DO.meas <- if(smooth.DO > 0) data.sub$DO.smooth else data.sub$DO.meas
PAR <- if(smooth.PAR == TRUE) data.sub$I.smooth else data.sub$I

# Initial values
# Set these to something sensible if the model is becoming stuck in a bad parameter space
# These values here are expressed per timestep, not per day. Divide desired initial K (/day) by the number of timesteps in a day, as shown in default below
# These values here are expressed per timestep, not per day. Divide desired initial K (/day) by the number of timesteps in a day, as shown in default below
inits <- function() { list(K = K.init / (86400/interval) ) }

# Different random seeds
kern=as.integer(runif(1000,min=1,max=10000))
iters=sample(kern,1)
# Set

# Set
n.chains <- 3
n.thin <- 10
p.est.n <- as.numeric(p.est)
theta.est.n <- as.numeric(theta.est)
K.est.n <- as.numeric(K.est)
K.meas.mean.ts <- K.meas.mean / (86400/interval)
K.meas.sd.ts <- K.meas.sd / (86400/interval)
data.list <- list("num.measurements","interval","tempC","DO.meas","PAR","salinity","atmo.pressure", "K.init",
"K.est.n", "K.meas.mean.ts", "K.meas.sd.ts", "p.est.n", "theta.est.n")
data.list <- list("num.measurements","interval","tempC","DO.meas","PAR","salinity","atmo.pressure", "K.init",
"K.est.n", "K.meas.mean.ts", "K.meas.sd.ts", "p.est.n", "theta.est.n")

# Define monitoring variables
params=c("A","R","K","K.day","p","theta","tau","ER","GPP", "NEP","sum.obs.resid","sum.ppa.resid","PPfit","DO.modelled")

## Call jags ##
# Set debug = T below to inspect each file for model convergence

# Set debug = T below to inspect each file for model convergence
# (inspect the main parameters for convergence using bgr diagrams, history, density and autocorrelation)
# n.iter=1000; n.burnin=500
metabfit=NULL
metabfit <- do.call(R2jags::jags.parallel,
list(data=data.list, inits=inits, parameters.to.save=params, model.file = file.path(system.file(package="BASEmetab"), "BASE_metab_model_v2.3.txt"),
metabfit <- do.call(jagsUI::jags,
list(data=data.list, inits=inits, parameters.to.save=params,
model.file = file.path(system.file(package="BASEmetab"), "BASE_metab_model_v2.3.txt"),
n.chains = n.chains, n.iter = n.iter, n.burnin = n.burnin,
n.thin = n.thin, n.cluster= n.chains, DIC = TRUE,
jags.seed = 123, digits=5))
n.thin = n.thin, DIC = TRUE, parallel = TRUE,
seed = 123, bugs.format=TRUE))

# print(metabfit, digits=2) # to inspect results of last metabfit

## diagnostic summaries
# Rhat (srf) test
srf<- metabfit$BUGSoutput$summary[,8]
srf<- metabfit$summary[,8]
Rhat.test <- NULL
Rhat.test <- ifelse(any(srf>1.1, na.rm=T)==TRUE,"Check convergence", "Fine")

# Check for convergence and update once if requested
if(update.chains == TRUE) {
if(Rhat.test == "Check convergence") {
recompile(metabfit)
metabfit <- update(metabfit, n.iter=n.iter*extra.iter)

#recompile(metabfit)
message(paste("Model not converged in", n.iter, "iterations, updating:"))
metabfit <- update(metabfit, n.iter=n.iter*extra.iter)

# Rhat (srf) test - second round in case metabfit is updated
srf<- metabfit$BUGSoutput$summary[,8]
srf<- metabfit$summary[,8]
Rhat.test <- NULL
Rhat.test <- ifelse(any(srf>1.1, na.rm=T)==TRUE,"Check convergence", "Fine")
}
}

# autocorr test
metabfit.mcmc<-coda::as.mcmc(metabfit)
ac.lag1 <- autocorr.diag(metabfit.mcmc, lags = 1)
metabfit.mcmc<- metabfit$samples
ac.lag1 <- coda::autocorr.diag(metabfit.mcmc, lags = 1)
auto.corr.test <- NULL
auto.corr.test <- ifelse(any(abs(ac.lag1)>0.1, na.rm=T)==TRUE,"Check ac", "ac OK")
PPP <- metabfit$BUGSoutput$summary["PPfit","mean"] # posterior predictive p-value
DO.mod.means <- metabfit$BUGSoutput$mean$DO.modelled
DO.mod.sd <- metabfit$BUGSoutput$sd$DO.modelled

PPP <- metabfit$summary["PPfit","mean"] # posterior predictive p-value

DO.mod.means <- metabfit$mean$DO.modelled
DO.mod.sd <- metabfit$sd$DO.modelled

R2 = cor(DO.mod.means,DO.meas)^2
rmse = sqrt(sum((metabfit$BUGSoutput$mean$DO.modelled-DO.meas)^2)/length(DO.meas))
post.mean.dev <- metabfit$BUGSoutput$mean$deviance
pD <- metabfit$BUGSoutput$pD
DIC <- metabfit$BUGSoutput$DIC
rmse = sqrt(sum((metabfit$mean$DO.modelled-DO.meas)^2)/length(DO.meas))
post.mean.dev <- metabfit$mean$deviance
pD <- metabfit$pD
DIC <- metabfit$DIC

DO.lag<-DO.meas[2:length(DO.meas)]-DO.meas[1:(length(DO.meas)-1)]
ptpvar <- sqrt((sum((DO.lag)^2)/(length(DO.meas)-1))) # point to point variation
rmse.relative <- rmse / ptpvar
diff<-metabfit$BUGSoutput$mean$DO.modelled-DO.meas

diff<-metabfit$mean$DO.modelled-DO.meas
mrl.max<-max(rle(sign(as.vector(diff)))$lengths)
mrl.fraction<-max(rle(sign(as.vector(diff)))$lengths)/length(DO.meas) # proportion of largest run
ER.K.cor <- cor(metabfit$BUGSoutput$sims.list$ER,metabfit$BUGSoutput$sims.list$K) # plot(metabfit$sims.list$ER ~ metabfit$sims.list$K)

ER.K.cor <- cor(metabfit$sims.list$ER,metabfit$sims.list$K) # plot(metabfit$sims.list$ER ~ metabfit$sims.list$K)

# insert results to table and write table
result <- data.frame(File=as.character(fname), Date=as.character(d), metabfit$BUGSoutput$mean$GPP, metabfit$BUGSoutput$sd$GPP, metabfit$BUGSoutput$mean$ER, metabfit$BUGSoutput$sd$ER, metabfit$BUGSoutput$mean$K.day,
metabfit$BUGSoutput$sd$K.day, metabfit$BUGSoutput$mean$theta, metabfit$BUGSoutput$sd$theta, metabfit$BUGSoutput$mean$A, metabfit$BUGSoutput$sd$A, metabfit$BUGSoutput$mean$p, metabfit$BUGSoutput$sd$p,
R2, PPP, rmse, rmse.relative, mrl.fraction, ER.K.cor, Rhat.test, metabfit$BUGSoutput$summary["A",8] , metabfit$BUGSoutput$summary["K",8],
metabfit$BUGSoutput$summary["theta",8], metabfit$BUGSoutput$summary["p",8], metabfit$BUGSoutput$summary["R",8], metabfit$BUGSoutput$summary["GPP",8], DIC, pD,
result <- data.frame(File=as.character(fname), Date=as.character(d), metabfit$mean$GPP, metabfit$sd$GPP, metabfit$mean$ER, metabfit$sd$ER, metabfit$mean$K.day,
metabfit$sd$K.day, metabfit$mean$theta, metabfit$sd$theta, metabfit$mean$A, metabfit$sd$A, metabfit$mean$p, metabfit$sd$p,
R2, PPP, rmse, rmse.relative, mrl.fraction, ER.K.cor, Rhat.test, metabfit$summary["A",8] , metabfit$summary["K",8],
metabfit$summary["theta",8], metabfit$summary["p",8], metabfit$summary["R",8], metabfit$summary["GPP",8], DIC, pD,
stringsAsFactors = FALSE)
output.table[nrow(output.table)+1,] <- result
write.csv(output.table, file=file.path(results.dir, "BASE_results.csv")) # output file overwritten at each iteration

# insert results to instantaneous table and write
if(instant == TRUE) {
instant.result <- data.frame(File=as.character(rep(fname,seconds/interval)), Date=as.character(rep(d,seconds/interval)),interval=1:(seconds/interval),
tempC=tempC, I=PAR,
K.instant=as.vector(metabfit$BUGSoutput$mean$K) * 1.0241^(tempC-mean(tempC)),
GPP.instant=as.vector(metabfit$BUGSoutput$mean$A) * PAR^as.vector(metabfit$BUGSoutput$mean$p),
ER.instant=as.vector(metabfit$BUGSoutput$mean$R) * as.vector(metabfit$BUGSoutput$mean$theta)^(tempC-mean(tempC)),
tempC=tempC, I=PAR,
K.instant=as.vector(metabfit$mean$K) * 1.0241^(tempC-mean(tempC)),
GPP.instant=as.vector(metabfit$mean$A) * PAR^as.vector(metabfit$mean$p),
ER.instant=as.vector(metabfit$mean$R) * as.vector(metabfit$mean$theta)^(tempC-mean(tempC)),
stringsAsFactors = FALSE)
instant.rates[(nrow(instant.rates)+1):(nrow(instant.rates)+(seconds/interval)),] <- instant.result
write.csv(instant.rates, file=file.path(results.dir, "instantaneous_rates.csv")) # output file name
}

# diagnostic multi-panel plot
jpeg(file=file.path(results.dir, paste0(substr(fname, 1,(nchar(fname)-4)),"_", as.character(d), ".jpg")), width=1200, height=1200, pointsize=30, quality=300)

par(mfrow=c(3,3), mar=c(3,4,2,1), oma=c(0.1,0.1,0.1,0.1))
traceplot(metabfit, varname=c('A','p','R','K.day','theta'), ask=FALSE, mfrow=c(3,3), new=FALSE)
jagsUI::traceplot(metabfit.mcmc[, c('A','p','R','K.day','theta')], ask=FALSE, mfrow=c(3,3), new=FALSE)

plot(1:num.measurements,data.sub$DO.meas, type="p",pch=21, col="grey60",cex=0.8, ylim=c(min(DO.mod.means-DO.mod.sd)-0.5,max(DO.mod.means+DO.mod.sd)+0.5), xlab="Timestep", ylab="DO mg/L")
if(smooth.DO > 0) { points(1:num.measurements,data.sub$DO.smooth,type='l',lwd=2,xlab="Timestep", col="red", cex=0.75) }
points(1:num.measurements,DO.mod.means,lwd=1.5, type="l", xlab="Timestep", col="black")
points(1:num.measurements,DO.mod.means,lwd=1.5, type="l", xlab="Timestep", col="black")
points(1:num.measurements,DO.mod.means+DO.mod.sd, type="l", lty=2)
points(1:num.measurements,DO.mod.means-DO.mod.sd, type="l", lty=2)
legend(x="topleft", legend=c("DO meas", "DO smooth", "DO fit"), pch=c(1,NA,NA), lty=c(NA,1,1), col=c("grey60","red", "black"), cex=0.75, bty='n')

plot(1:num.measurements,tempC,pch=1,xlab="Timestep" , typ='p', col="grey60")
legend(x="topleft", legend=c("TempC meas"), pch=c(1), col=c("grey60"), cex=0.75, bty='n')

plot(1:num.measurements,data.sub$I,pch=1,xlab="Timestep" , typ='p', col="grey60", ylab='PAR')
if(smooth.PAR==TRUE) { points(1:num.measurements,data.sub$I.smooth,type='l',lwd=2,xlab="Timestep", col="red", cex=0.75) }
legend(x="topleft", legend=c("PAR meas", "PAR smooth"), pch=c(1,NA), lty=c(NA,1), col=c("grey60","red"), cex=0.75, bty='n')

graphics.off()

}
}

end.time<-NULL; end.time<-Sys.time()
print(end.time-start.time)
return(output.table)

}


}
1 change: 1 addition & 0 deletions basemetab.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace