diff --git a/vmstools/DESCRIPTION b/vmstools/DESCRIPTION index 5e2c8ec..d03898b 100644 --- a/vmstools/DESCRIPTION +++ b/vmstools/DESCRIPTION @@ -11,3 +11,4 @@ Imports: cluster, data.table, doBy, maps, mapdata, maptools, PBSmapping, sp, seg Suggests: lattice, raster, shapefiles, tcltk Repository: CRAN LazyLoad: Yes +Roxygen: list(markdown = TRUE) diff --git a/vmstools/R/CSquare.r b/vmstools/R/CSquare.r index 6c77455..a6463d8 100644 --- a/vmstools/R/CSquare.r +++ b/vmstools/R/CSquare.r @@ -1,46 +1,70 @@ -CSquare <- function(lon,lat,degrees){ - - if(length(lon) != length(lat)) stop("length of longitude not equal to length of latitude") - if(!degrees %in% c(10,5,1,0.5,0.1,0.05,0.01)) stop("degrees specified not in range: c(10,5,1,0.5,0.1,0.05,0.01)") - - dims <- length(lon) - - quadrants <- array(NA,dim=c(4,6,dims),dimnames=list(c("globalQuadrant","intmQuadrant1","intmQuadrant2","intmQuadrant3"),c("quadrantDigit","latDigit","lonDigit","latRemain","lonRemain","code"),seq(1,dims,1))) - - quadrants["globalQuadrant","quadrantDigit",] <- 4-(((2*floor(1+(lon/200)))-1)*((2*floor(1+(lat/200)))+1)) - quadrants["globalQuadrant","latDigit",] <- floor(abs(lat)/10) - quadrants["globalQuadrant","lonDigit",] <- floor(abs(lon)/10) - quadrants["globalQuadrant","latRemain",] <- round(abs(lat)-(quadrants["globalQuadrant","latDigit",]*10),7) - quadrants["globalQuadrant","lonRemain",] <- round(abs(lon)-(quadrants["globalQuadrant","lonDigit",]*10),7) - quadrants["globalQuadrant","code",] <- quadrants["globalQuadrant","quadrantDigit",]*1000+quadrants["globalQuadrant","latDigit",]*100+quadrants["globalQuadrant","lonDigit",] - - quadrants["intmQuadrant1","quadrantDigit",] <- (2*floor(quadrants["globalQuadrant","latRemain",]*0.2))+floor(quadrants["globalQuadrant","lonRemain",]*0.2)+1 - quadrants["intmQuadrant1","latDigit",] <- floor(quadrants["globalQuadrant","latRemain",]) - quadrants["intmQuadrant1","lonDigit",] <- floor(quadrants["globalQuadrant","lonRemain",]) - quadrants["intmQuadrant1","latRemain",] <- round((quadrants["globalQuadrant","latRemain",]-quadrants["intmQuadrant1","latDigit",])*10,7) - quadrants["intmQuadrant1","lonRemain",] <- round((quadrants["globalQuadrant","lonRemain",]-quadrants["intmQuadrant1","lonDigit",])*10,7) - quadrants["intmQuadrant1","code",] <- quadrants["intmQuadrant1","quadrantDigit",]*100+quadrants["intmQuadrant1","latDigit",]*10+quadrants["intmQuadrant1","lonDigit",] - - quadrants["intmQuadrant2","quadrantDigit",] <- (2*floor(quadrants["intmQuadrant1","latRemain",]*0.2))+floor(quadrants["intmQuadrant1","lonRemain",]*0.2)+1 - quadrants["intmQuadrant2","latDigit",] <- floor(quadrants["intmQuadrant1","latRemain",]) - quadrants["intmQuadrant2","lonDigit",] <- floor(quadrants["intmQuadrant1","lonRemain",]) - quadrants["intmQuadrant2","latRemain",] <- round((quadrants["intmQuadrant1","latRemain",]-quadrants["intmQuadrant2","latDigit",])*10,7) - quadrants["intmQuadrant2","lonRemain",] <- round((quadrants["intmQuadrant1","lonRemain",]-quadrants["intmQuadrant2","lonDigit",])*10,7) - quadrants["intmQuadrant2","code",] <- quadrants["intmQuadrant2","quadrantDigit",]*100+quadrants["intmQuadrant2","latDigit",]*10+quadrants["intmQuadrant2","lonDigit",] - - quadrants["intmQuadrant3","quadrantDigit",] <- (2*floor(quadrants["intmQuadrant2","latRemain",]*0.2))+floor(quadrants["intmQuadrant2","lonRemain",]*0.2)+1 - quadrants["intmQuadrant3","latDigit",] <- floor(quadrants["intmQuadrant2","latRemain",]) - quadrants["intmQuadrant3","lonDigit",] <- floor(quadrants["intmQuadrant2","lonRemain",]) - quadrants["intmQuadrant3","latRemain",] <- round((quadrants["intmQuadrant2","latRemain",]-quadrants["intmQuadrant3","latDigit",])*10,7) - quadrants["intmQuadrant3","lonRemain",] <- round((quadrants["intmQuadrant2","lonRemain",]-quadrants["intmQuadrant3","lonDigit",])*10,7) - quadrants["intmQuadrant3","code",] <- quadrants["intmQuadrant3","quadrantDigit",]*100+quadrants["intmQuadrant3","latDigit",]*10+quadrants["intmQuadrant3","lonDigit",] - - if(degrees == 10) CSquareCodes <- quadrants["globalQuadrant","code",] - if(degrees == 5) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","quadrantDigit",],sep="") - if(degrees == 1) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],sep="") - if(degrees == 0.5) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","quadrantDigit",],sep="") - if(degrees == 0.1) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],sep="") - if(degrees == 0.05) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","quadrantDigit",],sep="") - if(degrees == 0.01) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","code",],sep="") - -return(CSquareCodes)} \ No newline at end of file +#' Calculate the CSquare notation from the GPS locations +#' +#' Compute the CSquare notation from the GPS location where you can define what +#' resolution you want the CSquare notation to be. +#' +#' +#' @param lon Longitudes of points +#' @param lat Latitudes of points +#' @param degrees Resolution of CSquare notation: 10, 5, 1, 0.5, 0.1, 0.05, +#' 0.01 +#' @author Niels T. Hintzen +#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}}, +#' \code{\link{lonLatRatio}}, \code{\link{distance}} +#' @references EU Lot 2 project, based on CSquare: +#' http://www.marine.csiro.au/csquares/ +#' @examples +#' +#' lon <- -4 +#' lat <- 50 +#' degrees <- 0.01 +#' +#' CSquare(lon,lat,degrees) # "7500:104:100:100" +#' +#' @export CSquare +CSquare <- function(lon,lat,degrees){ + + if(length(lon) != length(lat)) stop("length of longitude not equal to length of latitude") + if(!degrees %in% c(10,5,1,0.5,0.1,0.05,0.01)) stop("degrees specified not in range: c(10,5,1,0.5,0.1,0.05,0.01)") + + dims <- length(lon) + + quadrants <- array(NA,dim=c(4,6,dims),dimnames=list(c("globalQuadrant","intmQuadrant1","intmQuadrant2","intmQuadrant3"),c("quadrantDigit","latDigit","lonDigit","latRemain","lonRemain","code"),seq(1,dims,1))) + + quadrants["globalQuadrant","quadrantDigit",] <- 4-(((2*floor(1+(lon/200)))-1)*((2*floor(1+(lat/200)))+1)) + quadrants["globalQuadrant","latDigit",] <- floor(abs(lat)/10) + quadrants["globalQuadrant","lonDigit",] <- floor(abs(lon)/10) + quadrants["globalQuadrant","latRemain",] <- round(abs(lat)-(quadrants["globalQuadrant","latDigit",]*10),7) + quadrants["globalQuadrant","lonRemain",] <- round(abs(lon)-(quadrants["globalQuadrant","lonDigit",]*10),7) + quadrants["globalQuadrant","code",] <- quadrants["globalQuadrant","quadrantDigit",]*1000+quadrants["globalQuadrant","latDigit",]*100+quadrants["globalQuadrant","lonDigit",] + + quadrants["intmQuadrant1","quadrantDigit",] <- (2*floor(quadrants["globalQuadrant","latRemain",]*0.2))+floor(quadrants["globalQuadrant","lonRemain",]*0.2)+1 + quadrants["intmQuadrant1","latDigit",] <- floor(quadrants["globalQuadrant","latRemain",]) + quadrants["intmQuadrant1","lonDigit",] <- floor(quadrants["globalQuadrant","lonRemain",]) + quadrants["intmQuadrant1","latRemain",] <- round((quadrants["globalQuadrant","latRemain",]-quadrants["intmQuadrant1","latDigit",])*10,7) + quadrants["intmQuadrant1","lonRemain",] <- round((quadrants["globalQuadrant","lonRemain",]-quadrants["intmQuadrant1","lonDigit",])*10,7) + quadrants["intmQuadrant1","code",] <- quadrants["intmQuadrant1","quadrantDigit",]*100+quadrants["intmQuadrant1","latDigit",]*10+quadrants["intmQuadrant1","lonDigit",] + + quadrants["intmQuadrant2","quadrantDigit",] <- (2*floor(quadrants["intmQuadrant1","latRemain",]*0.2))+floor(quadrants["intmQuadrant1","lonRemain",]*0.2)+1 + quadrants["intmQuadrant2","latDigit",] <- floor(quadrants["intmQuadrant1","latRemain",]) + quadrants["intmQuadrant2","lonDigit",] <- floor(quadrants["intmQuadrant1","lonRemain",]) + quadrants["intmQuadrant2","latRemain",] <- round((quadrants["intmQuadrant1","latRemain",]-quadrants["intmQuadrant2","latDigit",])*10,7) + quadrants["intmQuadrant2","lonRemain",] <- round((quadrants["intmQuadrant1","lonRemain",]-quadrants["intmQuadrant2","lonDigit",])*10,7) + quadrants["intmQuadrant2","code",] <- quadrants["intmQuadrant2","quadrantDigit",]*100+quadrants["intmQuadrant2","latDigit",]*10+quadrants["intmQuadrant2","lonDigit",] + + quadrants["intmQuadrant3","quadrantDigit",] <- (2*floor(quadrants["intmQuadrant2","latRemain",]*0.2))+floor(quadrants["intmQuadrant2","lonRemain",]*0.2)+1 + quadrants["intmQuadrant3","latDigit",] <- floor(quadrants["intmQuadrant2","latRemain",]) + quadrants["intmQuadrant3","lonDigit",] <- floor(quadrants["intmQuadrant2","lonRemain",]) + quadrants["intmQuadrant3","latRemain",] <- round((quadrants["intmQuadrant2","latRemain",]-quadrants["intmQuadrant3","latDigit",])*10,7) + quadrants["intmQuadrant3","lonRemain",] <- round((quadrants["intmQuadrant2","lonRemain",]-quadrants["intmQuadrant3","lonDigit",])*10,7) + quadrants["intmQuadrant3","code",] <- quadrants["intmQuadrant3","quadrantDigit",]*100+quadrants["intmQuadrant3","latDigit",]*10+quadrants["intmQuadrant3","lonDigit",] + + if(degrees == 10) CSquareCodes <- quadrants["globalQuadrant","code",] + if(degrees == 5) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","quadrantDigit",],sep="") + if(degrees == 1) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],sep="") + if(degrees == 0.5) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","quadrantDigit",],sep="") + if(degrees == 0.1) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],sep="") + if(degrees == 0.05) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","quadrantDigit",],sep="") + if(degrees == 0.01) CSquareCodes <- paste(quadrants["globalQuadrant","code",],":",quadrants["intmQuadrant1","code",],":",quadrants["intmQuadrant2","code",],":",quadrants["intmQuadrant3","code",],sep="") + +return(CSquareCodes)} diff --git a/vmstools/R/CSquare2LonLat.r b/vmstools/R/CSquare2LonLat.r index 1e5ef48..a2733a3 100644 --- a/vmstools/R/CSquare2LonLat.r +++ b/vmstools/R/CSquare2LonLat.r @@ -1,48 +1,72 @@ -CSquare2LonLat <- function(csqr,degrees){ - -ra <- 1e-6 #Artificial number to add for rounding of 5'ves (round(0.5) = 0, but in Excel (where conversion comes from, it is 1) -chars <- an(nchar(csqr)) -tensqd <- an(substr(csqr,1,1))+ra; gqlat <- (round(abs(tensqd-4)*2/10)*10/5)-1 -tenslatd <- an(substr(csqr,2,2))+ra; gqlon <- (2*round(tensqd/10)-1)*-1 -tenslond <- an(substr(csqr,3,4))+ra -unitsqd <- an(substr(csqr,6,6))+ra; iqulat <- round(unitsqd*2/10) -unitslatd <- an(substr(csqr,7,7))+ra; iqulon <- (round((unitsqd-1)/2,1) - floor((unitsqd-1)/2))*2 -unitslond <- an(substr(csqr,8,8))+ra -tenthsqd <- an(substr(csqr,10,10))+ra; iqtlat <- round(tenthsqd*2/10) -tenthslatd <- an(substr(csqr,11,11))+ra; iqtlon <- (round((tenthsqd-1)/2,1) - floor((tenthsqd-1)/2))*2 -tenthslond <- an(substr(csqr,12,12))+ra -hundqd <- an(substr(csqr,14,14))+ra; iqhlat <- round(hundqd*2/10) -hundlatd <- an(substr(csqr,15,15))+ra; iqhlon <- (round((hundqd-1)/2,1) - floor((hundqd-1)/2))*2 -hundlond <- an(substr(csqr,16,16))+ra -reso <- 10^(1-floor((chars-4)/4))-((round((chars-4)/4,1)-floor((chars-4)/4))*10^(1-floor((chars-4)/4))) - -if(degrees < reso[1]) stop("Returning degrees is smaller than format of C-square") -if(degrees == 10){ - lat <- ((tenslatd*10)+5)*gqlat-ra - lon <- ((tenslond*10)+5)*gqlon-ra -} -if(degrees == 5){ - lat <- ((tenslatd*10)+(iqulat*5)+2.5)*gqlat-ra - lon <- ((tenslond*10)+(iqulon*5)+2.5)*gqlon-ra -} -if(degrees == 1){ - lat <- ((tenslatd*10)+ unitslatd+0.5)*gqlat-ra - lon <- ((tenslond*10)+ unitslond+0.5)*gqlon-ra -} -if(degrees == 0.5){ - lat <- ((tenslatd*10)+ unitslatd + (iqtlat*0.5)+0.25)*gqlat-ra - lon <- ((tenslond*10)+ unitslond + (iqtlon*0.5)+0.25)*gqlon-ra -} -if(degrees == 0.1){ - lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+0.05)*gqlat-ra - lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+0.05)*gqlon-ra -} -if(degrees == 0.05){ - lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(iqhlat*0.05)+0.025)*gqlat-ra - lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(iqhlon*0.05)+0.025)*gqlon-ra -} -if(degrees == 0.01){ - lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(hundlatd*0.01)+0.005)*gqlat-ra - lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(hundlond*0.01)+0.005)*gqlon-ra -} -return(data.frame(SI_LATI=lat,SI_LONG=lon))} +#' Calculate the GPS notation from the CSquare notation +#' +#' Convert the CSquare notation to GPS location where you can define what +#' resolution you want the GPS notation to be. +#' +#' +#' @param csqr CSquare notation like: 1400:134:120:356 +#' @param degrees Degrees resolution of returned GPS notation : 10, 5, 1, 0.5, +#' 0.1, 0.05, 0.01 +#' @author Niels T. Hintzen +#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}}, +#' \code{\link{lonLatRatio}}, \code{\link{distance}} +#' @references EU Lot 2 project, based on CSquare: +#' http://www.marine.csiro.au/csquares/ +#' @examples +#' +#' data(tacsat) +#' tacsat$CSquare <- CSquare(tacsat$SI_LONG,tacsat$SI_LATI,0.05) +#' tacsat$SI_LONG2 <- CSquare2LonLat(tacsat$CSquare,0.5)$SI_LONG +#' tacsat$SI_LATI2 <- CSquare2LonLat(tacsat$CSquare,0.5)$SI_LATI +#' tacsat$CSquare2 <- CSquare(tacsat$SI_LONG2,tacsat$SI_LATI2,1) +#' head(tacsat[which(tacsat$CSquare != tacsat$CSquare2),]) +#' +#' @export CSquare2LonLat +CSquare2LonLat <- function(csqr,degrees){ + +ra <- 1e-6 #Artificial number to add for rounding of 5'ves (round(0.5) = 0, but in Excel (where conversion comes from, it is 1) +chars <- an(nchar(csqr)) +tensqd <- an(substr(csqr,1,1))+ra; gqlat <- (round(abs(tensqd-4)*2/10)*10/5)-1 +tenslatd <- an(substr(csqr,2,2))+ra; gqlon <- (2*round(tensqd/10)-1)*-1 +tenslond <- an(substr(csqr,3,4))+ra +unitsqd <- an(substr(csqr,6,6))+ra; iqulat <- round(unitsqd*2/10) +unitslatd <- an(substr(csqr,7,7))+ra; iqulon <- (round((unitsqd-1)/2,1) - floor((unitsqd-1)/2))*2 +unitslond <- an(substr(csqr,8,8))+ra +tenthsqd <- an(substr(csqr,10,10))+ra; iqtlat <- round(tenthsqd*2/10) +tenthslatd <- an(substr(csqr,11,11))+ra; iqtlon <- (round((tenthsqd-1)/2,1) - floor((tenthsqd-1)/2))*2 +tenthslond <- an(substr(csqr,12,12))+ra +hundqd <- an(substr(csqr,14,14))+ra; iqhlat <- round(hundqd*2/10) +hundlatd <- an(substr(csqr,15,15))+ra; iqhlon <- (round((hundqd-1)/2,1) - floor((hundqd-1)/2))*2 +hundlond <- an(substr(csqr,16,16))+ra +reso <- 10^(1-floor((chars-4)/4))-((round((chars-4)/4,1)-floor((chars-4)/4))*10^(1-floor((chars-4)/4))) + +if(degrees < reso[1]) stop("Returning degrees is smaller than format of C-square") +if(degrees == 10){ + lat <- ((tenslatd*10)+5)*gqlat-ra + lon <- ((tenslond*10)+5)*gqlon-ra +} +if(degrees == 5){ + lat <- ((tenslatd*10)+(iqulat*5)+2.5)*gqlat-ra + lon <- ((tenslond*10)+(iqulon*5)+2.5)*gqlon-ra +} +if(degrees == 1){ + lat <- ((tenslatd*10)+ unitslatd+0.5)*gqlat-ra + lon <- ((tenslond*10)+ unitslond+0.5)*gqlon-ra +} +if(degrees == 0.5){ + lat <- ((tenslatd*10)+ unitslatd + (iqtlat*0.5)+0.25)*gqlat-ra + lon <- ((tenslond*10)+ unitslond + (iqtlon*0.5)+0.25)*gqlon-ra +} +if(degrees == 0.1){ + lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+0.05)*gqlat-ra + lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+0.05)*gqlon-ra +} +if(degrees == 0.05){ + lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(iqhlat*0.05)+0.025)*gqlat-ra + lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(iqhlon*0.05)+0.025)*gqlon-ra +} +if(degrees == 0.01){ + lat <- ((tenslatd*10)+ unitslatd + (tenthslatd*0.1)+(hundlatd*0.01)+0.005)*gqlat-ra + lon <- ((tenslond*10)+ unitslond + (tenthslond*0.1)+(hundlond*0.01)+0.005)*gqlon-ra +} +return(data.frame(SI_LATI=lat,SI_LONG=lon))} diff --git a/vmstools/R/ICESarea.r b/vmstools/R/ICESarea.r index 72a153d..44f8813 100644 --- a/vmstools/R/ICESarea.r +++ b/vmstools/R/ICESarea.r @@ -1,3 +1,27 @@ +#' Get ICES area from coordinates +#' +#' Get the ICES area from any lon,lat position, given that this position is +#' within the ICES region. +#' +#' +#' @param tacsat dataframe given that they have 'SI_LONG' and 'SI_LATI' columns +#' (either tacsat format or other dataset with SI_LONG and SI_LATI columns) +#' @param areas ICES areas as SpatialPolygons +#' @param proj4string Projection string, default to NULL. +#' @param fast If memory allocation is not a problem, a faster version can be +#' switched on +#' @return Returns the areas as a vector +#' @author Niels T. Hintzen +#' @seealso \code{\link{ICESrectangle}}, \code{\link{ICESrectangle2LonLat}} +#' @references EU Lot 2 project +#' @examples +#' +#' data(ICESareas) +#' res <- data.frame(SI_LONG = c(1,2,2,4,2), +#' SI_LATI = c(53,53.2,54,56.7,55.2)) +#' areas <- ICESarea(res,ICESareas) +#' +#' @export ICESarea ICESarea <- function(tacsat,areas,proj4string=NULL,fast=FALSE){ require(sp) if(!class(areas) %in% c("SpatialPolygons","SpatialPolygonsDataFrame")) stop("'areas' must be specified as class 'SpatialPolygons' or 'SpatialPolygonsDataFrame'") diff --git a/vmstools/R/ICESrectangle.r b/vmstools/R/ICESrectangle.r index 82e1b02..89dd2f7 100644 --- a/vmstools/R/ICESrectangle.r +++ b/vmstools/R/ICESrectangle.r @@ -1,23 +1,42 @@ -ICESrectangle <- function(dF){ - rectChar1n2 <- sprintf("%02i",as.integer(2 * (dF[, "SI_LATI"] - 35.5))) - rectChar3 <- ifelse(dF[, "SI_LONG"] > -50 & dF[, "SI_LONG"]<= -40, "A", - ifelse(dF[, "SI_LONG"] > -40 & dF[, "SI_LONG"]<= -30, "B", - ifelse(dF[, "SI_LONG"] > -30 & dF[, "SI_LONG"]<= -20, "C", - ifelse(dF[, "SI_LONG"] > -20 & dF[, "SI_LONG"]<= -10, "D", - ifelse(dF[, "SI_LONG"] > -10 & dF[, "SI_LONG"]< 0, "E", - #-Note that at 0 meridian the allocation of points at the meridian switch - ifelse(dF[, "SI_LONG"] >= 0 & dF[, "SI_LONG"]< 10, "F", - ifelse(dF[, "SI_LONG"] >= 10 & dF[, "SI_LONG"]< 20, "G", - ifelse(dF[, "SI_LONG"] >= 20 & dF[, "SI_LONG"]< 30, "H", "J")))))))) - rectChar4 <- rep(NA,nrow(dF)) - idxlowzero <- which(dF[,"SI_LONG"] < 0) - idxabozero <- which(dF[,"SI_LONG"] >= 0) - if(length(idxlowzero)>0) rectChar4[idxlowzero] <- ceiling(dF[idxlowzero,"SI_LONG"] %% 10 -1 + 10)%%10 - if(length(idxabozero)>0) rectChar4[idxabozero] <- floor(dF[idxabozero,"SI_LONG"] %% 10) - rectID <- paste(rectChar1n2, rectChar3, rectChar4, sep = "") - return(rectID)} - - -#dF <- as.data.frame(cbind(SI_LONG = seq(-20,-9,0.5),SI_LATI = rep(54,length(seq(-20,-9,0.5))))) -#dF <- as.data.frame(cbind(SI_LONG = seq(0,12,0.5),SI_LATI = rep(54,length(seq(0,12,0.5))))) -#dF$ICES <- ICESrectangle(dF) \ No newline at end of file +#' Get ICES rectangle from coordinates +#' +#' Get the ICES rectangle from any lon,lat position, given that this position +#' is within the ICES region. +#' +#' +#' @param dF dataframe given that they have 'SI_LONG' and 'SI_LATI' columns +#' @return Returns the rectangles as a vector +#' @author Neil Campbell +#' @seealso \code{\link{ICESarea}}, \code{\link{ICESrectangle2LonLat}} +#' @references EU Lot 2 project +#' @examples +#' +#' res <- data.frame(SI_LONG = c(1,2,2,4,2), +#' SI_LATI = c(53,53.2,54,56.7,55.2)) +#' ICESrectangle(res) +#' +#' +#' @export ICESrectangle +ICESrectangle <- function(dF){ + rectChar1n2 <- sprintf("%02i",as.integer(2 * (dF[, "SI_LATI"] - 35.5))) + rectChar3 <- ifelse(dF[, "SI_LONG"] > -50 & dF[, "SI_LONG"]<= -40, "A", + ifelse(dF[, "SI_LONG"] > -40 & dF[, "SI_LONG"]<= -30, "B", + ifelse(dF[, "SI_LONG"] > -30 & dF[, "SI_LONG"]<= -20, "C", + ifelse(dF[, "SI_LONG"] > -20 & dF[, "SI_LONG"]<= -10, "D", + ifelse(dF[, "SI_LONG"] > -10 & dF[, "SI_LONG"]< 0, "E", + #-Note that at 0 meridian the allocation of points at the meridian switch + ifelse(dF[, "SI_LONG"] >= 0 & dF[, "SI_LONG"]< 10, "F", + ifelse(dF[, "SI_LONG"] >= 10 & dF[, "SI_LONG"]< 20, "G", + ifelse(dF[, "SI_LONG"] >= 20 & dF[, "SI_LONG"]< 30, "H", "J")))))))) + rectChar4 <- rep(NA,nrow(dF)) + idxlowzero <- which(dF[,"SI_LONG"] < 0) + idxabozero <- which(dF[,"SI_LONG"] >= 0) + if(length(idxlowzero)>0) rectChar4[idxlowzero] <- ceiling(dF[idxlowzero,"SI_LONG"] %% 10 -1 + 10)%%10 + if(length(idxabozero)>0) rectChar4[idxabozero] <- floor(dF[idxabozero,"SI_LONG"] %% 10) + rectID <- paste(rectChar1n2, rectChar3, rectChar4, sep = "") + return(rectID)} + + +#dF <- as.data.frame(cbind(SI_LONG = seq(-20,-9,0.5),SI_LATI = rep(54,length(seq(-20,-9,0.5))))) +#dF <- as.data.frame(cbind(SI_LONG = seq(0,12,0.5),SI_LATI = rep(54,length(seq(0,12,0.5))))) +#dF$ICES <- ICESrectangle(dF) diff --git a/vmstools/R/ICESrectangle2LonLat.r b/vmstools/R/ICESrectangle2LonLat.r index 3bc41ff..24a264f 100644 --- a/vmstools/R/ICESrectangle2LonLat.r +++ b/vmstools/R/ICESrectangle2LonLat.r @@ -1,47 +1,70 @@ -ICESrectangle2LonLat <- -function (statsq,midpoint=FALSE) -{ - #Split the code into its respective parts - latpart <- substr(statsq, 1, 2) - lonpart <- substr(statsq, 3, 4) - - #Handle the latitude first, as its easier - #The grid increments continuously from - #south to north in 0.5 degree intervals - latlabels <- sprintf("%02i",1:99) - lat.mids <- seq(36, 85, 0.5) + 0.25 - lat.idx <- match(latpart, latlabels) - lat <- lat.mids[lat.idx] - - #The longtitudinal structure is not so easy - #There are three main traps: - # - A4-A9 do not exist - # - There are no I squares - # - Grid ends at M8 - lonlabels <- paste(rep(LETTERS[c(2:8,10:13)], each=10), rep(0:9, - 7), sep = "") - lonlabels <- c("A0","A1","A2","A3",lonlabels[-length(lonlabels)]) - lon.mids <- -44:68 + 0.5 - lon.idx <- match(lonpart, lonlabels) - lon <- lon.mids[lon.idx] - - #Check whether it worked - #If any part of the code is not recognised, both - #lon and lat should be NA - failed.codes <- is.na(lat) | is.na(lon) - if (any(failed.codes)) { - warning("Some stat squares are not valid. Please check the help files for ICESrectangle2LonLat() for more information about the formal definition of valid ICES rectangles.") - lat[failed.codes] <- NA - lon[failed.codes] <- NA - } - - #Correct for midpoints - if(midpoint == FALSE){ - - lat <- lat - 0.25 - lon <- lon - 0.5 - - } - #Done - return(data.frame(SI_LATI=lat,SI_LONG=lon)) -} +#' Convert the ICES rectangle to coordinates +#' +#' Convert the ICES rectangle to the longitude and latitude position of that +#' specific rectangle, with either midpoint or lower-left corner. +#' +#' +#' @param statsq character vector of ICES rectangles +#' @param midpoint logical statement if returned values need to be midpoints of +#' rectangle or not (default) +#' @return Returns dataframe with longitude and latitude columns +#' @author Neil Campbell, Mark Payne +#' @seealso \code{\link{ICESarea}}, \code{\link{ICESrectangle}} +#' @references EU Lot 2 project +#' @examples +#' +#' res <- data.frame(SI_LONG = c(1,2,2,4,2), +#' SI_LATI = c(53,53.2,54,56.7,55.2)) +#' rects <- ICESrectangle(res) +#' midpoints <- ICESrectangle2LonLat(rects,midpoint=TRUE) +#' corners <- ICESrectangle2LonLat(rects,midpoint=FALSE) +#' +#' +#' @export ICESrectangle2LonLat +ICESrectangle2LonLat <- +function (statsq,midpoint=FALSE) +{ + #Split the code into its respective parts + latpart <- substr(statsq, 1, 2) + lonpart <- substr(statsq, 3, 4) + + #Handle the latitude first, as its easier + #The grid increments continuously from + #south to north in 0.5 degree intervals + latlabels <- sprintf("%02i",1:99) + lat.mids <- seq(36, 85, 0.5) + 0.25 + lat.idx <- match(latpart, latlabels) + lat <- lat.mids[lat.idx] + + #The longtitudinal structure is not so easy + #There are three main traps: + # - A4-A9 do not exist + # - There are no I squares + # - Grid ends at M8 + lonlabels <- paste(rep(LETTERS[c(2:8,10:13)], each=10), rep(0:9, + 7), sep = "") + lonlabels <- c("A0","A1","A2","A3",lonlabels[-length(lonlabels)]) + lon.mids <- -44:68 + 0.5 + lon.idx <- match(lonpart, lonlabels) + lon <- lon.mids[lon.idx] + + #Check whether it worked + #If any part of the code is not recognised, both + #lon and lat should be NA + failed.codes <- is.na(lat) | is.na(lon) + if (any(failed.codes)) { + warning("Some stat squares are not valid. Please check the help files for ICESrectangle2LonLat() for more information about the formal definition of valid ICES rectangles.") + lat[failed.codes] <- NA + lon[failed.codes] <- NA + } + + #Correct for midpoints + if(midpoint == FALSE){ + + lat <- lat - 0.25 + lon <- lon - 0.5 + + } + #Done + return(data.frame(SI_LATI=lat,SI_LONG=lon)) +} diff --git a/vmstools/R/N1p0.R b/vmstools/R/N1p0.R index 04650b2..9126155 100644 --- a/vmstools/R/N1p0.R +++ b/vmstools/R/N1p0.R @@ -1,6 +1,35 @@ -`N1p0` <- -function(x,mu,sig,p){ - aa<- c((1/(sig*sqrt(2*pi)))*exp(-((x-(mu+p))^2)/(2*sig^2))) - return(aa) - } - +#' 2-dimensional Gaussian distribution with two-peak possibility +#' +#' Computes the 2-dimensional Gaussian distribution depending on the distance +#' from the mean, and standard deviation with adjustable mean and shift from +#' mean into two peaks instead of 1 +#' +#' +#' @param x Observed value, deviance from mean +#' @param mu Mean value +#' @param sig Standard deviation of value +#' @param p Shift from mean into two peaks +#' @note Function is called inside plotCIinterpolation() +#' @author Niels T. Hintzen +#' @references EU lot 2 project +#' @examples +#' +#' +#' x <- matrix(c(5,4,3,4,5,4,3,2,3,4,3,2,1,2,3, +#' 3,2,1,2,3,4,3,2,3,4,5,4,3,4,5),nrow=6,ncol=5,byrow=TRUE) +#' mu <- 0 +#' sig <- x^0.5 +#' p <- 0 +#' +#' res <- matrix(N1p0(x,mu,sig,p),nrow=6,ncol=5) +#' +#' #Plot the results +#' image(res) +#' +#' @export N1p0 +`N1p0` <- +function(x,mu,sig,p){ + aa<- c((1/(sig*sqrt(2*pi)))*exp(-((x-(mu+p))^2)/(2*sig^2))) + return(aa) + } + diff --git a/vmstools/R/ac.R b/vmstools/R/ac.R index 2d2063c..b5a59d5 100644 --- a/vmstools/R/ac.R +++ b/vmstools/R/ac.R @@ -1,4 +1,20 @@ -`ac` <- -function(x){return(as.character(x))} - - # Hello world ! \ No newline at end of file +#' shortcut for as.character +#' +#' Change the class of an object to character +#' +#' +#' @param x object to turn into character +#' @return as.character attempts to coerce its argument to character type +#' @author Niels T. Hintzen +#' @seealso \code{\link{as.character}} +#' @references EU Lot 2 project +#' @examples +#' +#' as.character(5) #returns the number 5 as class 'character' +#' ac(5) #returns the number 5 also as class 'character' +#' +#' @export ac +`ac` <- +function(x){return(as.character(x))} + + # Hello world ! diff --git a/vmstools/R/activityTacsat.r b/vmstools/R/activityTacsat.r index 8d5a8a8..c413aa0 100644 --- a/vmstools/R/activityTacsat.r +++ b/vmstools/R/activityTacsat.r @@ -1,421 +1,488 @@ -activityTacsat <- function(tacsat,units="year",analyse.by="LE_GEAR",storeScheme=NULL,plot=FALSE,level="all"){ - - require("mixtools") - if (!"SI_DATIM" %in% colnames(tacsat)) - tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, - sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - if(!analyse.by %in% c("LE_GEAR","VE_REF")) stop("Analysing only by gear or vessel") - - #- Make subset for only those tacsat records that have speed - tacsat$ID <- 1:nrow(tacsat) - tacsat$SI_STATE <- NA - tacsatOrig <- tacsat - idx <- which(is.na(tacsat$SI_SP)==FALSE) - tacsat <- tacsat[idx,] - - #- If sigma is NULL it needs to be estimated and gets a variable name - storeScheme$sigma0[which(storeScheme$sigma0==0)] <- "d" - - if(units == "all"){ yrs <- 0; mths <- 0; wks <- 0} - if(units == "year"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- 0; wks <- 0} - if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- an(sort(unique(format(tacsat$SI_DATIM,"%m")))); wks <- 0} - if(units == "week"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks <- an(sort(unique(format(tacsat$SI_DATIM,"%W"))))+1; mths <- 0} - - runScheme <- expand.grid(years=yrs,months=mths,weeks=wks) - - #----------------------------------------------------------------------------- - # Start run for all combinations of gear / vessel and units - #----------------------------------------------------------------------------- - - for(iRun in 1:nrow(runScheme)){ - yr <- runScheme[iRun,"years"] - mth <- runScheme[iRun,"months"] - wk <- runScheme[iRun,"weeks"] - if(nrow(runScheme)==1){ sTacsat <- tacsat - } else { - if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr) - if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & (an(format( tacsat$SI_DATIM,"%W"))+1) == wk) - if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & format(tacsat$SI_DATIM,"%m") == mth) - } - if(plot==TRUE) x11(); - #--------------------------------------------------------------------------- - #- Analyses when gear info is supplied LE_GEAR - #--------------------------------------------------------------------------- - if("LE_GEAR" %in% colnames(tacsat) & analyse.by == "LE_GEAR"){ - - gearList <- names(which((rowSums(table(sTacsat$LE_GEAR,sTacsat$SI_SP)) - table(sTacsat$LE_GEAR,sTacsat$SI_SP)[,"0"])>40)) - - #- Mirror the tacsat dataset and make a selection - tyg <- subset(sTacsat,is.na(LE_GEAR) == FALSE & LE_GEAR %in% gearList); tygmr <- tyg; tygmr$SI_SP <- -1* tygmr$SI_SP; tygmr <- rbind(tyg,tygmr) - tng <- subset(sTacsat,is.na(LE_GEAR) == TRUE | !LE_GEAR %in% gearList); tngmr <- tng; tngmr$SI_SP <- -1* tngmr$SI_SP; tngmr <- rbind(tng,tngmr) - - #------------------------------------------------------------------------- - #- Get general speed pattern by gear, use analysed number of kernals LE_GEAR + GENERIC - #------------------------------------------------------------------------- - res <- list() - for(iGr in unique(tyg$LE_GEAR)){ - - #- Get rid of very influential datapoints (lower their abundance) - tbl <- table(subset(tygmr,LE_GEAR==iGr)$SI_SP); - spd <- an(names(rev(sort(tbl))[1])) - idx <- which(subset(tygmr,LE_GEAR==iGr)$SI_SP==spd) - nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) - if(tbl[ac(spd)]/tbl[nxt] > 5){ - idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) - if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,LE_GEAR==iGr)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) - } else { idx <- -1:-nrow(subset(tygmr,LE_GEAR==iGr))} - - #----------------------------------------------------------------------------- - # Fit the 3 or 5 normal distributions. If parameter guestimates are - # available, then use these - #----------------------------------------------------------------------------- - if(is.null(storeScheme)==TRUE){ - res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5,maxrestarts=20,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5))) - } else { - #- Fitting model when mean values of peaks has been defined - if("means" %in% colnames(storeScheme)){ - - #- Extract parameters from storeScheme - ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"means"] - sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"]) - fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"]) - - #- Setup parameter estimate vectors for mu and sigma - if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")} - if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")} - if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," "))))) - - #- Fit the actual model through the normalmixEM function - res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), sigma=rep(1,length(constraintsd)), - maxrestarts=20,mean.constr=constraintmn,sd.constr=constraintsd)) - } else { - #- Fitting model when number of peaks has been defined - - #- Extract parameters from storeScheme - ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"peaks"] - sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"]) - fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"]) - - #- Setup parameter estimate vectors for mu and sigma - if(ss==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")} - if(ss==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")} - if(length(ss)>0){ - - #- Fit the actual model through the normalmixEM function - if(is.na(ss)==TRUE) res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20, mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5))) - if(is.na(ss)==FALSE) res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=ss,maxrestarts=20, mean.constr=constraintmn, sd.constr=constraintsd, sigma=rep(1,length(constraintsd)))) - } else { res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20, mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))} - } - } - if(plot==TRUE) plot(res[[iGr]],2,breaks=100,xlim=c(-20,20)) - } - if(level == "vessel"){ - #- Transform the output into the right format - for(iGr in unique(tyg$LE_GEAR)) - if(!class(res[[iGr]]) == "try-error"){ res[[iGr]] <- res[[iGr]]$mu } else { res[[iGr]] <- rep(NA,5)} - res <- lapply(res,function(x){if(class(x)=="try-error"){x<-rep(NA,5)}else{x}}) - res <- lapply(res,sort) - } - - if(level == "vessel"){ - #------------------------------------------------------------------------- - #- Perform analyses per vessel with gear LE_GEAR + VE_REF - #------------------------------------------------------------------------- - - if(nrow(tygmr)>40) - shipList <- names(which((rowSums(table(tygmr$VE_REF,tygmr$SI_SP)) - table(tygmr$VE_REF,tygmr$SI_SP)[,"0"])>20)) - shipFit <- list() - if(exists("shipList")){ - for(iShip in shipList){ - - #- Get rid of very influential datapoints (lower their abundance) - tbl <- table(subset(tygmr,VE_REF==iShip)$SI_SP); - spd <- an(names(rev(sort(tbl))[1])) - idx <- which(subset(tygmr,VE_REF==iShip)$SI_SP==spd) - nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) - if(tbl[ac(spd)]/tbl[nxt] > 5){ - idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) - if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) - } else { idx <- -1:-nrow(subset(tygmr,VE_REF==iShip))} - - shipTacsat <- subset(tygmr,VE_REF == iShip) - - #----------------------------------------------------------------------------- - # Fit the 3 or 5 normal distributions. If parameter guestimates are - # available, then use these - #----------------------------------------------------------------------------- - - #- Setup parameter estimate vectors for mu and sigma - if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} - if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")} - - #- Fit the actual model through the normalmixEM function - shipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],mu=res[[names(which.max(table(shipTacsat$LE_GEAR)))]],maxit=2000, - sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd)) - - if(class(shipFit[[iShip]])!= "try-error"){ - - #- Analyse the fit and turn it into a result of fishing - no fishing - #mu <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE) - #sds <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x - mu <- shipFit[[iShip]]$mu - sds <- shipFit[[iShip]]$sigma - - probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) - for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) - SI_STATE <- apply(probs,1,which.max) - - if(length(mu)==3){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} - if(length(mu)==5){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} - tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] - } else { tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- NA} - } - } - } else { - for(iGr in unique(tyg$LE_GEAR)){ - if(!class(res[[iGr]]) == "try-error"){ - - #- Analyse the fit and turn it into a result of fishing - no fishing - #mu <- sort.int(res[[iGr]]$mu,index.return=TRUE) - #sds <- res[[iGr]]$sigma[mu$ix]; mu <- mu$x - mu <- res[[iGr]]$mu - sds <- res[[iGr]]$sigma - probs <- dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) - for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[i],sd=sds[i])) - SI_STATE <- apply(probs,1,which.max) - - if(length(mu)==3){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} - if(length(mu)==5){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} - tacsat$SI_STATE[which(tacsat$ID %in% subset(tyg,LE_GEAR == iGr)$ID)] <- SI_STATE - } - } - } - #------------------------------------------------------------------------- - #- Perform analyses per vessel without gear NO_GEAR + VE_REF - #------------------------------------------------------------------------- - if(nrow(tngmr)>40) - nonshipList <- names(which((rowSums(table(tngmr$VE_REF,tngmr$SI_SP)) - table(tngmr$VE_REF,tngmr$SI_SP)[,"0"])>20)) - nonshipFit <- list() - if(exists("nonshipList")){ - for(iShip in nonshipList){ - - #- Get rid of very influential datapoints (lower their abundance) - tbl <- table(subset(tngmr,VE_REF==iShip)$SI_SP); - spd <- an(names(rev(sort(tbl))[1])) - idx <- which(subset(tngmr,VE_REF==iShip)$SI_SP==spd) - nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) - if(tbl[ac(spd)]/tbl[nxt] > 5){ - idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) - if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tngmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) - } else { idx <- -1:-nrow(subset(tngmr,VE_REF==iShip))} - - #----------------------------------------------------------------------------- - # Fit the 3 or 5 normal distributions. If parameter guestimates are - # available, then use these - #----------------------------------------------------------------------------- - - shipTacsat <- subset(tngmr,VE_REF == iShip) - if(exists("shipFit")){ - if(iShip %in% names(shipFit)){ - - #- Setup parameter estimate vectors for mu and sigma - if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} - if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")} - - #- Fit the actual model through the normalmixEM function - nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000, - sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd)) - } else { - #- Fit the actual model through the normalmixEM function - nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=5,maxit=2000,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))} - - if(!class(nonshipFit[[iShip]]) == "try-error"){ - - #- Analyse the fit and turn it into a result of fishing - no fishing - #mu <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE) - #sds <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x - mu <- nonshipFit[[iShip]]$mu - sds <- nonshipFit[[iShip]]$sds - - probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) - for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) - SI_STATE <- apply(probs,1,which.max) - - if(length(mu)==3){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} - if(length(mu)==5){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} - tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] - } - } - } - } - } - #--------------------------------------------------------------------------- - #- Analyses when vessel info is supplied VE_REF + ENOUGH # - #--------------------------------------------------------------------------- - if("VE_REF" %in% colnames(tacsat) & analyse.by == "VE_REF"){ - - vesselList <- names(which((rowSums(table(sTacsat$VE_REF,sTacsat$SI_SP)) - table(sTacsat$VE_REF,sTacsat$SI_SP)[,"0"])>40)) - - #- Mirror the tacsat dataset and make a selection - tyv <- subset(sTacsat,is.na(VE_REF) == FALSE & VE_REF %in% vesselList); tyvmr <- tyv; tyvmr$SI_SP <- -1* tyvmr$SI_SP; tyvmr <- rbind(tyv,tyvmr) - tnv <- subset(sTacsat,is.na(VE_REF) == TRUE | !VE_REF %in% vesselList); tnvmr <- tnv; tnvmr$SI_SP <- -1* tnvmr$SI_SP; tnvmr <- rbind(tnv,tnvmr) - - #- Perform analyses per vessel - if(nrow(tyv)>40) - shipList <- names(which((rowSums(table(tyvmr$VE_REF,tyvmr$SI_SP)) - table(tyvmr$VE_REF,tyvmr$SI_SP)[,"0"])>20)) - shipFit <- list() - if(exists("shipList")){ - for(iShip in shipList){ - - #- Get rid of very influential data points - tbl <- table(subset(tyvmr,VE_REF==iShip)$SI_SP); - spd <- an(names(rev(sort(tbl))[1])) - idx <- which(subset(tyvmr,VE_REF==iShip)$SI_SP==spd) - nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) - if(tbl[ac(spd)]/tbl[nxt] > 5){ - idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) - if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tyvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) - } else { idx <- -1:-nrow(subset(tyvmr,VE_REF==iShip))} - - shipTacsat <- subset(tyvmr,VE_REF == iShip) - - #----------------------------------------------------------------------------- - # Fit the 3 or 5 normal distributions. If parameter guestimates are - # available, then use these - #----------------------------------------------------------------------------- - if(is.null(storeScheme)==TRUE){ - shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5))) - } else { - #- Fitting model when mean values of peaks has been defined - if("means" %in% colnames(storeScheme)){ - - #- Extract parameters from storeScheme - ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"means"] - sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"]) - fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"]) - - #- Setup parameter estimate vectors for mu and sigma - if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} - if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")} - if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," "))))) - - #- Fit the actual model through the normalmixEM function - shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), maxit=2000, - mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd)))) - } else { - #- Fitting model when number of peaks has been defined - - #- Extract parameters from storeScheme - ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"peaks"] - sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"]) - fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"]) - - #- Setup parameter estimate vectors for mu and sigma - if(ss==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} - if(ss==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")} - if(length(ss)>0){ - - #- Fit the actual model through the normalmixEM function - if(is.na(ss)==TRUE) shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5))) - if(is.na(ss)==FALSE) shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=ss,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd)))) - } else { shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))} - } - } - if(plot==TRUE) plot(shipFit[[iShip]],2,breaks=100,xlim=c(-20,20)) - if(!class(shipFit[[iShip]]) == "try-error"){ - - #- Analyse the fit and turn it into a result of fishing - no fishing - #mu <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE) - #sds <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x - mu <- shipFit[[iShip]]$mu - sds <- shipFit[[iShip]]$sigma - - probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) - for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) - SI_STATE <- apply(probs,1,which.max) - - if(length(mu)==3){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} - if(length(mu)==5){ - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} - tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] - } - } - } - #------------------------------------------------------------------------- - #- Perform analyses for all vessels left over VE_REF + NOT ENOUGH # - #------------------------------------------------------------------------- - if(nrow(tnvmr)>40) - nonshipList <- names(which((rowSums(table(tnvmr$VE_REF,tnvmr$SI_SP)) - table(tnvmr$VE_REF,tnvmr$SI_SP)[,"0"])>20)) - nonshipFit <- list() - if(exists("nonshipList")){ - for(iShip in nonshipList){ - - #- Get rid of very influential datapoints (lower their abundance) - tbl <- table(subset(tnvmr,VE_REF==iShip)$SI_SP); - spd <- an(names(rev(sort(tbl))[1])) - idx <- which(subset(tnvmr,VE_REF==iShip)$SI_SP==spd) - nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) - if(tbl[ac(spd)]/tbl[nxt] > 5){ - idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) - if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tnvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) - } else { idx <- -1:-nrow(subset(tnvmr,VE_REF==iShip))} - - shipTacsat <- subset(tnvmr,VE_REF == iShip) - #----------------------------------------------------------------------------- - # Fit the 3 or 5 normal distributions. If parameter guestimates are - # available, then use these - #----------------------------------------------------------------------------- - if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} - if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")} - - #- Fit the actual model through the normalmixEM function - nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd)))) - - if(!class(nonshipFit[[iShip]]) == "try-error"){ - - #- Analyse the fit and turn it into a result of fishing - no fishing - #mu <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE) - #sds <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x - mu <- nonshipFit[[iShip]]$mu - sds <- nonshipFit[[iShip]]$sigma - - probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) - for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) - SI_STATE <- apply(probs,1,which.max) - - if(length(mu)==3) - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE) - if(length(mu)==5) - SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE) - tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] - } - } - } - } - } - -leftOverTacsat <- tacsatOrig[which(!tacsatOrig$ID %in% tacsat$ID),] -tacsat <- rbind(tacsat,leftOverTacsat) -tacsat <- orderBy(~ID,tacsat) -cat("Note that in case of 5 peaks: no fishing = h, fishing = f, steaming / no fishing = s\n") -cat("Note that in case of 3 peaks: fishing = f, steaming / no fishing = s\n") -return(tacsat$SI_STATE)} \ No newline at end of file +#' Define activity of fishing vessel based on speed profile analyses. +#' +#' Analyse tacsat data by gear or vessel and define, based on speed profile and +#' fitting normal distributions through these speed profiles, fishing and other +#' activities. +#' +#' The analysis assumes that the speed profile close to 0 can be mirrored to +#' create a good normal distribution. Success of fit might depend on the +#' initial guess in peaks or mean peak values. +#' +#' Note that the default value of sigma0 = 0.911 (used when prior estimates of +#' parameters are not given). The value of 0.911 corresponds to a width of +#' 1.5knots on each side of the mean under a 90 percent CI. +#' +#' @param tacsat tacsat dataset +#' @param units Analyse by: "year", "month" and "week". "month" and "week" +#' cannot be used at same time. +#' @param analyse.by Analyse tacsat by gear ("LE_GEAR") or vessel ("VE_REF"). +#' @param storeScheme If \code{\link{activityTacsatAnalyse}} is used, supply +#' output here. +#' @param plot Logical. Whether the results of the fit of the normal +#' distributions should be plotted +#' @param level If analyse.by="LE_GEAR" is selected, there is an option to +#' analyse at vessel level too, but taking the gear parameter estimates as +#' starting values. level = "vessel" turns this option on. +#' @return In general, a 5-peak analysis results in h=no fishing / in harbour, +#' f=fishing, s=steaming. A 3-peak analyses results in f=fishing (closest to +#' zero speed) and s=steaming. +#' @author Niels T. Hintzen +#' @seealso \code{\link{activityTacsatAnalyse}}, +#' \code{\link{segmentedTacsatSpeed}} +#' @examples +#' +#' data(tacsat) +#' data(eflalo) +#' +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)] +#' +#' tacsatp$LE_GEAR <- ac(tacsatp$LE_GEAR) +#' tacsatp$LE_GEAR[which(is.na(tacsatp$LE_GEAR)==TRUE)] <- "NO_GEAR" +#' tacsat <- tacsatp +#' +#' #--------- LE_GEAR ----------- +#' #- Visual analyses of activity, and define number of peaks / kernels that can be +#' # observed (either 3 or 5) +#' \dontrun{ +#' storeScheme <- activityTacsatAnalyse(subset(tacsat,LE_GEAR == "OTM" & +#' format(SI_DATIM,"%Y") == 1801),units="year",analyse.by="LE_GEAR",identify="means") +#' res <- activityTacsat(subset(tacsat,LE_GEAR == "OTM" & +#' format(SI_DATIM,"%Y") == 1801),units="year",analyse.by="LE_GEAR", +#' storeScheme,plot=TRUE) +#' res <- activityTacsat(subset(tacsat,LE_GEAR == "OTM" & +#' format(SI_DATIM,"%Y") == 1801),units="year",analyse.by="LE_GEAR", +#' storeScheme,plot=TRUE,level="vessel") +#' +#' #--------- VE_REF ----------- +#' tacsat <- subset(tacsat,VE_REF == "801") +#' storeScheme <- activityTacsatAnalyse(tacsat,units="year",analyse.by="VE_REF", +#' identify="means") +#' +#' #- Run activityTacsat +#' res <- activityTacsat(tacsat,units="year",analyse.by="VE_REF",storeScheme, +#' plot=TRUE,level="all") +#' } +#' +#' @export activityTacsat +activityTacsat <- function(tacsat,units="year",analyse.by="LE_GEAR",storeScheme=NULL,plot=FALSE,level="all"){ + + require("mixtools") + if (!"SI_DATIM" %in% colnames(tacsat)) + tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, + sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + if(!analyse.by %in% c("LE_GEAR","VE_REF")) stop("Analysing only by gear or vessel") + + #- Make subset for only those tacsat records that have speed + tacsat$ID <- 1:nrow(tacsat) + tacsat$SI_STATE <- NA + tacsatOrig <- tacsat + idx <- which(is.na(tacsat$SI_SP)==FALSE) + tacsat <- tacsat[idx,] + + #- If sigma is NULL it needs to be estimated and gets a variable name + storeScheme$sigma0[which(storeScheme$sigma0==0)] <- "d" + + if(units == "all"){ yrs <- 0; mths <- 0; wks <- 0} + if(units == "year"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- 0; wks <- 0} + if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- an(sort(unique(format(tacsat$SI_DATIM,"%m")))); wks <- 0} + if(units == "week"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks <- an(sort(unique(format(tacsat$SI_DATIM,"%W"))))+1; mths <- 0} + + runScheme <- expand.grid(years=yrs,months=mths,weeks=wks) + + #----------------------------------------------------------------------------- + # Start run for all combinations of gear / vessel and units + #----------------------------------------------------------------------------- + + for(iRun in 1:nrow(runScheme)){ + yr <- runScheme[iRun,"years"] + mth <- runScheme[iRun,"months"] + wk <- runScheme[iRun,"weeks"] + if(nrow(runScheme)==1){ sTacsat <- tacsat + } else { + if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr) + if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & (an(format( tacsat$SI_DATIM,"%W"))+1) == wk) + if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & format(tacsat$SI_DATIM,"%m") == mth) + } + if(plot==TRUE) x11(); + #--------------------------------------------------------------------------- + #- Analyses when gear info is supplied LE_GEAR + #--------------------------------------------------------------------------- + if("LE_GEAR" %in% colnames(tacsat) & analyse.by == "LE_GEAR"){ + + gearList <- names(which((rowSums(table(sTacsat$LE_GEAR,sTacsat$SI_SP)) - table(sTacsat$LE_GEAR,sTacsat$SI_SP)[,"0"])>40)) + + #- Mirror the tacsat dataset and make a selection + tyg <- subset(sTacsat,is.na(LE_GEAR) == FALSE & LE_GEAR %in% gearList); tygmr <- tyg; tygmr$SI_SP <- -1* tygmr$SI_SP; tygmr <- rbind(tyg,tygmr) + tng <- subset(sTacsat,is.na(LE_GEAR) == TRUE | !LE_GEAR %in% gearList); tngmr <- tng; tngmr$SI_SP <- -1* tngmr$SI_SP; tngmr <- rbind(tng,tngmr) + + #------------------------------------------------------------------------- + #- Get general speed pattern by gear, use analysed number of kernals LE_GEAR + GENERIC + #------------------------------------------------------------------------- + res <- list() + for(iGr in unique(tyg$LE_GEAR)){ + + #- Get rid of very influential datapoints (lower their abundance) + tbl <- table(subset(tygmr,LE_GEAR==iGr)$SI_SP); + spd <- an(names(rev(sort(tbl))[1])) + idx <- which(subset(tygmr,LE_GEAR==iGr)$SI_SP==spd) + nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) + if(tbl[ac(spd)]/tbl[nxt] > 5){ + idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) + if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,LE_GEAR==iGr)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) + } else { idx <- -1:-nrow(subset(tygmr,LE_GEAR==iGr))} + + #----------------------------------------------------------------------------- + # Fit the 3 or 5 normal distributions. If parameter guestimates are + # available, then use these + #----------------------------------------------------------------------------- + if(is.null(storeScheme)==TRUE){ + res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5,maxrestarts=20,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5))) + } else { + #- Fitting model when mean values of peaks has been defined + if("means" %in% colnames(storeScheme)){ + + #- Extract parameters from storeScheme + ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"means"] + sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"]) + fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"]) + + #- Setup parameter estimate vectors for mu and sigma + if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")} + if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")} + if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," "))))) + + #- Fit the actual model through the normalmixEM function + res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), sigma=rep(1,length(constraintsd)), + maxrestarts=20,mean.constr=constraintmn,sd.constr=constraintsd)) + } else { + #- Fitting model when number of peaks has been defined + + #- Extract parameters from storeScheme + ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"peaks"] + sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"sigma0"]) + fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iGr),"fixPeaks"]) + + #- Setup parameter estimate vectors for mu and sigma + if(ss==3){ constraintmn <- c("-a",0,"a") } else { constraintmn <- c("-b","-a",0,"a","b")} + if(ss==3){ constraintsd <- c("a","b","a")} else { constraintsd <- c("b","a",sigma,"a","b")} + if(length(ss)>0){ + + #- Fit the actual model through the normalmixEM function + if(is.na(ss)==TRUE) res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20, mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5))) + if(is.na(ss)==FALSE) res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=ss,maxrestarts=20, mean.constr=constraintmn, sd.constr=constraintsd, sigma=rep(1,length(constraintsd)))) + } else { res[[iGr]] <- try(normalmixEM(subset(tygmr,LE_GEAR==iGr)$SI_SP[-idx],maxit=1000,k=5, maxrestarts=20, mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))} + } + } + if(plot==TRUE) plot(res[[iGr]],2,breaks=100,xlim=c(-20,20)) + } + if(level == "vessel"){ + #- Transform the output into the right format + for(iGr in unique(tyg$LE_GEAR)) + if(!class(res[[iGr]]) == "try-error"){ res[[iGr]] <- res[[iGr]]$mu } else { res[[iGr]] <- rep(NA,5)} + res <- lapply(res,function(x){if(class(x)=="try-error"){x<-rep(NA,5)}else{x}}) + res <- lapply(res,sort) + } + + if(level == "vessel"){ + #------------------------------------------------------------------------- + #- Perform analyses per vessel with gear LE_GEAR + VE_REF + #------------------------------------------------------------------------- + + if(nrow(tygmr)>40) + shipList <- names(which((rowSums(table(tygmr$VE_REF,tygmr$SI_SP)) - table(tygmr$VE_REF,tygmr$SI_SP)[,"0"])>20)) + shipFit <- list() + if(exists("shipList")){ + for(iShip in shipList){ + + #- Get rid of very influential datapoints (lower their abundance) + tbl <- table(subset(tygmr,VE_REF==iShip)$SI_SP); + spd <- an(names(rev(sort(tbl))[1])) + idx <- which(subset(tygmr,VE_REF==iShip)$SI_SP==spd) + nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) + if(tbl[ac(spd)]/tbl[nxt] > 5){ + idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) + if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tygmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) + } else { idx <- -1:-nrow(subset(tygmr,VE_REF==iShip))} + + shipTacsat <- subset(tygmr,VE_REF == iShip) + + #----------------------------------------------------------------------------- + # Fit the 3 or 5 normal distributions. If parameter guestimates are + # available, then use these + #----------------------------------------------------------------------------- + + #- Setup parameter estimate vectors for mu and sigma + if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} + if(length(res[[names(which.max(table(shipTacsat$LE_GEAR)))]])==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")} + + #- Fit the actual model through the normalmixEM function + shipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],mu=res[[names(which.max(table(shipTacsat$LE_GEAR)))]],maxit=2000, + sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd)) + + if(class(shipFit[[iShip]])!= "try-error"){ + + #- Analyse the fit and turn it into a result of fishing - no fishing + #mu <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE) + #sds <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x + mu <- shipFit[[iShip]]$mu + sds <- shipFit[[iShip]]$sigma + + probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) + for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) + SI_STATE <- apply(probs,1,which.max) + + if(length(mu)==3){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} + if(length(mu)==5){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} + tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] + } else { tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- NA} + } + } + } else { + for(iGr in unique(tyg$LE_GEAR)){ + if(!class(res[[iGr]]) == "try-error"){ + + #- Analyse the fit and turn it into a result of fishing - no fishing + #mu <- sort.int(res[[iGr]]$mu,index.return=TRUE) + #sds <- res[[iGr]]$sigma[mu$ix]; mu <- mu$x + mu <- res[[iGr]]$mu + sds <- res[[iGr]]$sigma + probs <- dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) + for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=subset(tyg,LE_GEAR==iGr)$SI_SP,mean=mu[i],sd=sds[i])) + SI_STATE <- apply(probs,1,which.max) + + if(length(mu)==3){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} + if(length(mu)==5){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} + tacsat$SI_STATE[which(tacsat$ID %in% subset(tyg,LE_GEAR == iGr)$ID)] <- SI_STATE + } + } + } + #------------------------------------------------------------------------- + #- Perform analyses per vessel without gear NO_GEAR + VE_REF + #------------------------------------------------------------------------- + if(nrow(tngmr)>40) + nonshipList <- names(which((rowSums(table(tngmr$VE_REF,tngmr$SI_SP)) - table(tngmr$VE_REF,tngmr$SI_SP)[,"0"])>20)) + nonshipFit <- list() + if(exists("nonshipList")){ + for(iShip in nonshipList){ + + #- Get rid of very influential datapoints (lower their abundance) + tbl <- table(subset(tngmr,VE_REF==iShip)$SI_SP); + spd <- an(names(rev(sort(tbl))[1])) + idx <- which(subset(tngmr,VE_REF==iShip)$SI_SP==spd) + nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) + if(tbl[ac(spd)]/tbl[nxt] > 5){ + idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) + if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tngmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) + } else { idx <- -1:-nrow(subset(tngmr,VE_REF==iShip))} + + #----------------------------------------------------------------------------- + # Fit the 3 or 5 normal distributions. If parameter guestimates are + # available, then use these + #----------------------------------------------------------------------------- + + shipTacsat <- subset(tngmr,VE_REF == iShip) + if(exists("shipFit")){ + if(iShip %in% names(shipFit)){ + + #- Setup parameter estimate vectors for mu and sigma + if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} + if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")} + + #- Fit the actual model through the normalmixEM function + nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000, + sigma=rep(1,length(constraintsd)),mean.constr=constraintmn,sd.constr=constraintsd)) + } else { + #- Fit the actual model through the normalmixEM function + nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=5,maxit=2000,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5)))} + + if(!class(nonshipFit[[iShip]]) == "try-error"){ + + #- Analyse the fit and turn it into a result of fishing - no fishing + #mu <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE) + #sds <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x + mu <- nonshipFit[[iShip]]$mu + sds <- nonshipFit[[iShip]]$sds + + probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) + for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) + SI_STATE <- apply(probs,1,which.max) + + if(length(mu)==3){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} + if(length(mu)==5){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} + tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] + } + } + } + } + } + #--------------------------------------------------------------------------- + #- Analyses when vessel info is supplied VE_REF + ENOUGH # + #--------------------------------------------------------------------------- + if("VE_REF" %in% colnames(tacsat) & analyse.by == "VE_REF"){ + + vesselList <- names(which((rowSums(table(sTacsat$VE_REF,sTacsat$SI_SP)) - table(sTacsat$VE_REF,sTacsat$SI_SP)[,"0"])>40)) + + #- Mirror the tacsat dataset and make a selection + tyv <- subset(sTacsat,is.na(VE_REF) == FALSE & VE_REF %in% vesselList); tyvmr <- tyv; tyvmr$SI_SP <- -1* tyvmr$SI_SP; tyvmr <- rbind(tyv,tyvmr) + tnv <- subset(sTacsat,is.na(VE_REF) == TRUE | !VE_REF %in% vesselList); tnvmr <- tnv; tnvmr$SI_SP <- -1* tnvmr$SI_SP; tnvmr <- rbind(tnv,tnvmr) + + #- Perform analyses per vessel + if(nrow(tyv)>40) + shipList <- names(which((rowSums(table(tyvmr$VE_REF,tyvmr$SI_SP)) - table(tyvmr$VE_REF,tyvmr$SI_SP)[,"0"])>20)) + shipFit <- list() + if(exists("shipList")){ + for(iShip in shipList){ + + #- Get rid of very influential data points + tbl <- table(subset(tyvmr,VE_REF==iShip)$SI_SP); + spd <- an(names(rev(sort(tbl))[1])) + idx <- which(subset(tyvmr,VE_REF==iShip)$SI_SP==spd) + nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) + if(tbl[ac(spd)]/tbl[nxt] > 5){ + idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) + if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tyvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) + } else { idx <- -1:-nrow(subset(tyvmr,VE_REF==iShip))} + + shipTacsat <- subset(tyvmr,VE_REF == iShip) + + #----------------------------------------------------------------------------- + # Fit the 3 or 5 normal distributions. If parameter guestimates are + # available, then use these + #----------------------------------------------------------------------------- + if(is.null(storeScheme)==TRUE){ + shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",0.911,"a","b"),sigma=rep(1,5))) + } else { + #- Fitting model when mean values of peaks has been defined + if("means" %in% colnames(storeScheme)){ + + #- Extract parameters from storeScheme + ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"means"] + sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"]) + fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"]) + + #- Setup parameter estimate vectors for mu and sigma + if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} + if(length(c(na.omit(as.numeric(strsplit(ss," ")[[1]]))))==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")} + if(fixPeaks) constraintmn <- c(na.omit(anf(unlist(strsplit(ss," "))))) + + #- Fit the actual model through the normalmixEM function + shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],mu=c(na.omit(as.numeric(strsplit(ss," ")[[1]]))), maxit=2000, + mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd)))) + } else { + #- Fitting model when number of peaks has been defined + + #- Extract parameters from storeScheme + ss <- storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"peaks"] + sigma <- anf(storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"sigma0"]) + fixPeaks <- ac( storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by == iShip),"fixPeaks"]) + + #- Setup parameter estimate vectors for mu and sigma + if(ss==3){ constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} + if(ss==3){ constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",sigma,"a","b")} + if(length(ss)>0){ + + #- Fit the actual model through the normalmixEM function + if(is.na(ss)==TRUE) shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5))) + if(is.na(ss)==FALSE) shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=ss,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd)))) + } else { shipFit[[iShip]] <- try(normalmixEM(subset(tyvmr,VE_REF==iShip)$SI_SP[-idx],maxit=2000,k=5,mean.constr=c("-b","-a",0,"a","b"),sd.constr=c("b","a",sigma,"a","b"),sigma=rep(1,5)))} + } + } + if(plot==TRUE) plot(shipFit[[iShip]],2,breaks=100,xlim=c(-20,20)) + if(!class(shipFit[[iShip]]) == "try-error"){ + + #- Analyse the fit and turn it into a result of fishing - no fishing + #mu <- sort.int(shipFit[[iShip]]$mu,index.return=TRUE) + #sds <- shipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x + mu <- shipFit[[iShip]]$mu + sds <- shipFit[[iShip]]$sigma + + probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) + for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) + SI_STATE <- apply(probs,1,which.max) + + if(length(mu)==3){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE)} + if(length(mu)==5){ + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE)} + tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] + } + } + } + #------------------------------------------------------------------------- + #- Perform analyses for all vessels left over VE_REF + NOT ENOUGH # + #------------------------------------------------------------------------- + if(nrow(tnvmr)>40) + nonshipList <- names(which((rowSums(table(tnvmr$VE_REF,tnvmr$SI_SP)) - table(tnvmr$VE_REF,tnvmr$SI_SP)[,"0"])>20)) + nonshipFit <- list() + if(exists("nonshipList")){ + for(iShip in nonshipList){ + + #- Get rid of very influential datapoints (lower their abundance) + tbl <- table(subset(tnvmr,VE_REF==iShip)$SI_SP); + spd <- an(names(rev(sort(tbl))[1])) + idx <- which(subset(tnvmr,VE_REF==iShip)$SI_SP==spd) + nxt <- ifelse(names(rev(sort(tbl))[1])==ac(spd),ifelse(abs(an(names(rev(sort(tbl))[2])))==abs(spd),names(rev(sort(tbl))[3]),names(rev(sort(tbl))[2])),names(rev(sort(tbl))[1])) + if(tbl[ac(spd)]/tbl[nxt] > 5){ + idx <- sample(idx,tbl[ac(spd)]-tbl[nxt]*2,replace=FALSE) + if(length(which(abs(an(names(tbl))) %in% spd))>1) idx <- c(idx,sample(which(subset(tnvmr,VE_REF==iShip)$SI_SP==(-1*spd)),tbl[ac(-1*spd)]-tbl[nxt]*2,replace=FALSE)) + } else { idx <- -1:-nrow(subset(tnvmr,VE_REF==iShip))} + + shipTacsat <- subset(tnvmr,VE_REF == iShip) + #----------------------------------------------------------------------------- + # Fit the 3 or 5 normal distributions. If parameter guestimates are + # available, then use these + #----------------------------------------------------------------------------- + if(length(shipFit[[iShip]]$mu)==3){constraintmn <- c("-a",0,"a")} else { constraintmn <- c("-b","-a",0,"a","b")} + if(length(shipFit[[iShip]]$mu)==3){constraintsd <- c("a","b","a")}else { constraintsd <- c("b","a",0.911,"a","b")} + + #- Fit the actual model through the normalmixEM function + nonshipFit[[iShip]] <- try(normalmixEM(shipTacsat$SI_SP[-idx],k=length(shipFit[[iShip]]$mu),maxit=2000,mean.constr=constraintmn,sd.constr=constraintsd,sigma=rep(1,length(constraintsd)))) + + if(!class(nonshipFit[[iShip]]) == "try-error"){ + + #- Analyse the fit and turn it into a result of fishing - no fishing + #mu <- sort.int(nonshipFit[[iShip]]$mu,index.return=TRUE) + #sds <- nonshipFit[[iShip]]$sigma[mu$ix]; mu <- mu$x + mu <- nonshipFit[[iShip]]$mu + sds <- nonshipFit[[iShip]]$sigma + + probs <- dnorm(x=shipTacsat$SI_SP,mean=mu[ceiling(length(mu)/2)],sd=sds[ceiling(length(mu)/2)]) + for(i in (ceiling(length(mu)/2)+1):length(mu)) probs <- cbind(probs,dnorm(x=shipTacsat$SI_SP,mean=mu[i],sd=sds[i])) + SI_STATE <- apply(probs,1,which.max) + + if(length(mu)==3) + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("f","s"); SI_STATE <- ac(SI_STATE) + if(length(mu)==5) + SI_STATE <- af(SI_STATE); levels(SI_STATE) <- c("h","f","s"); SI_STATE <- ac(SI_STATE) + tacsat$SI_STATE[which(tacsat$ID %in% shipTacsat$ID)] <- SI_STATE[1:(length(SI_STATE)/2)] + } + } + } + } + } + +leftOverTacsat <- tacsatOrig[which(!tacsatOrig$ID %in% tacsat$ID),] +tacsat <- rbind(tacsat,leftOverTacsat) +tacsat <- orderBy(~ID,tacsat) +cat("Note that in case of 5 peaks: no fishing = h, fishing = f, steaming / no fishing = s\n") +cat("Note that in case of 3 peaks: fishing = f, steaming / no fishing = s\n") +return(tacsat$SI_STATE)} diff --git a/vmstools/R/activityTacsatAnalyse.r b/vmstools/R/activityTacsatAnalyse.r index 59d3c6c..48215ff 100644 --- a/vmstools/R/activityTacsatAnalyse.r +++ b/vmstools/R/activityTacsatAnalyse.r @@ -1,52 +1,106 @@ -activityTacsatAnalyse <- function(tacsat,units="year",analyse.by="LE_GEAR",identify="peaks"){ - - if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'") - if(!analyse.by %in% c("LE_GEAR","VE_REF")) warning("Analysing by unknown column variable, please check!") - if(analyse.by %in% colnames(tacsat)){ - if(units == "all"){ yrs <- 0; mths <- 0; wks <- 0} - if(units == "year"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- 0; wks <- 0} - if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- sort(unique(month(tacsat$SI_DATIM))); wks <- 0} - if(units == "week"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks <- sort(unique(week(tacsat$SI_DATIM))); mths <- 0} - - runScheme <- expand.grid(years=yrs,months=mths,weeks=wks,stringsAsFactors=FALSE) - storeScheme <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by]),stringsAsFactors=FALSE) - storeScheme$peaks <- NA - storeScheme$fixPeaks <- FALSE - storeScheme$sigma0 <- 0.911 - if(identify == "means") storeScheme$means <- NA - for(iRun in 1:nrow(runScheme)){ - yr <- runScheme[iRun,"years"] - mth <- runScheme[iRun,"months"] - wk <- runScheme[iRun,"weeks"] - if(nrow(runScheme)==1){ sTacsat <- tacsat - } else { - if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr) - if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk) - if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth) - } - - for(iBy in na.omit(unique(sTacsat[,analyse.by]))){ - dat <- subset(sTacsat,sTacsat[,analyse.by] == iBy) - datmr <- dat; datmr$SI_SP <- -1*dat$SI_SP; datmr <- rbind(dat,datmr) - xrange <- pmin(20,range(datmr$SI_SP),na.rm=TRUE); datmr$SI_SP[which(abs(datmr$SI_SP) >20)] <- NA - hist(datmr$SI_SP,breaks=seq(-20,20,0.5),main=paste(iBy,ifelse(yr>0,yr,""),ifelse(mth>0,mth,""),ifelse(wk>0,wk,"")),xaxt="n") - axis(1,at=seq(-20,20,1)) - - require(tcltk) - - pks <- callNumberPeak() - storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"peaks"] <- pks - - if(identify=="means"){ - valPeaks <- callPeakValue(pks) - if(substr(valPeaks,1,1)==" ") - valPeaks <- substr(valPeaks,2,nchar(valPeaks)) - storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & - storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"means"] <- valPeaks - - } - } - } - } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")} -return(storeScheme)} \ No newline at end of file +#' Analyse speed profiles to define number of peaks. +#' +#' Analyse histrogram of speed profiles by gear or vessel to define number of +#' peaks +#' +#' The analyses assumes that the speed profile close to 0 can be mirrored to +#' create a good normal distribution. The left-hand side of the histogram is +#' identical to the right-hand side and therefore potential identified peaks +#' should be mirrored as well. The detailed 'means' approach does take more +#' time to identify but is more succesful in fitting distributions to the speed +#' profile. It pays off to specify mean values of the peaks by 0.5knots +#' precision. +#' +#' Additional Parameters: Sigma0: Fixed sigma value for distribution around +#' zero. Zero (0) if parameter needs to be estimated. FixPeaks: Option to fix +#' the top of the density curves at the initial parameter values from +#' 'storeScheme'. +#' +#' @param tacsat tacsat dataset +#' @param units Analyse by: "year", "month" and "week". "month" and "week" +#' cannot be used at same time +#' @param analyse.by Analyse tacsat by gear ("LE_GEAR") or vessel ("VE_REF") +#' @param identify Identify either the number of peaks ("peaks", rough +#' approach) or mean speed associated with peaks ("means", detailed approach) +#' @return Returns a \code{\link{data.frame}} with a run scheme that can be +#' used as input to \code{\link{activityTacsat}} +#' @author Niels T. Hintzen +#' @seealso \code{\link{activityTacsat}}, \code{\link{segmentedTacsatSpeed}} +#' @examples +#' +#' data(tacsat) +#' data(eflalo) +#' +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)] +#' +#' tacsatp$LE_GEAR <- ac(tacsatp$LE_GEAR) +#' tacsatp$LE_GEAR[which(is.na(tacsatp$LE_GEAR)==TRUE)] <- "NO_GEAR" +#' tacsat <- tacsatp +#' +#' \dontrun{ +#' #--------- LE_GEAR ----------- +#' #- Visual analyses of activity, and define number of peaks / kernels that can +#' # be observed (either 3 or 5) +#' storeScheme <- activityTacsatAnalyse(subset(tacsat,LE_GEAR == "OTM" & +#' year(SI_DATIM) == 1801),units="year",analyse.by="LE_GEAR", +#' identify="peaks") +#' storeScheme <- activityTacsatAnalyse(subset(tacsat,LE_GEAR == "OTM" & +#' year(SI_DATIM) == 1801),units="year",analyse.by="LE_GEAR", +#' identify="means") +#' +#' } +#' +#' @export activityTacsatAnalyse +activityTacsatAnalyse <- function(tacsat,units="year",analyse.by="LE_GEAR",identify="peaks"){ + + if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'") + if(!analyse.by %in% c("LE_GEAR","VE_REF")) warning("Analysing by unknown column variable, please check!") + if(analyse.by %in% colnames(tacsat)){ + if(units == "all"){ yrs <- 0; mths <- 0; wks <- 0} + if(units == "year"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- 0; wks <- 0} + if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- sort(unique(month(tacsat$SI_DATIM))); wks <- 0} + if(units == "week"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks <- sort(unique(week(tacsat$SI_DATIM))); mths <- 0} + + runScheme <- expand.grid(years=yrs,months=mths,weeks=wks,stringsAsFactors=FALSE) + storeScheme <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by]),stringsAsFactors=FALSE) + storeScheme$peaks <- NA + storeScheme$fixPeaks <- FALSE + storeScheme$sigma0 <- 0.911 + if(identify == "means") storeScheme$means <- NA + for(iRun in 1:nrow(runScheme)){ + yr <- runScheme[iRun,"years"] + mth <- runScheme[iRun,"months"] + wk <- runScheme[iRun,"weeks"] + if(nrow(runScheme)==1){ sTacsat <- tacsat + } else { + if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr) + if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk) + if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth) + } + + for(iBy in na.omit(unique(sTacsat[,analyse.by]))){ + dat <- subset(sTacsat,sTacsat[,analyse.by] == iBy) + datmr <- dat; datmr$SI_SP <- -1*dat$SI_SP; datmr <- rbind(dat,datmr) + xrange <- pmin(20,range(datmr$SI_SP),na.rm=TRUE); datmr$SI_SP[which(abs(datmr$SI_SP) >20)] <- NA + hist(datmr$SI_SP,breaks=seq(-20,20,0.5),main=paste(iBy,ifelse(yr>0,yr,""),ifelse(mth>0,mth,""),ifelse(wk>0,wk,"")),xaxt="n") + axis(1,at=seq(-20,20,1)) + + require(tcltk) + + pks <- callNumberPeak() + storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"peaks"] <- pks + + if(identify=="means"){ + valPeaks <- callPeakValue(pks) + if(substr(valPeaks,1,1)==" ") + valPeaks <- substr(valPeaks,2,nchar(valPeaks)) + storeScheme[which(storeScheme$years == yr & storeScheme$months == mth & + storeScheme$weeks == wk & storeScheme$analyse.by== iBy),"means"] <- valPeaks + + } + } + } + } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")} +return(storeScheme)} diff --git a/vmstools/R/addWidth.r b/vmstools/R/addWidth.r index 1f3e85b..6d4f5e7 100644 --- a/vmstools/R/addWidth.r +++ b/vmstools/R/addWidth.r @@ -1,34 +1,71 @@ -addWidth <- function(interpolation,gearWidth){ - - -allPolygons <- list() -counter <- 0 -for(i in 1:length(interpolation)){ - - #Take the interpolated points - xs <- interpolation[[i]][-1,1] - ys <- interpolation[[i]][-1,2] - - if(all(is.na(xs))==F & all(is.na(ys))==FALSE){ - counter <- counter + 1 - #Calculate the bearing towards and away from each point - bear1 <- bearing(xs[1:(length(xs)-2)],ys[1:(length(xs)-2)],xs[2:(length(xs)-1)],ys[2:(length(xs)-1)]) - bear2 <- bearing(xs[2:(length(xs)-1)],ys[2:(length(xs)-1)],xs[3:length(xs)],ys[3:length(xs)]) - avbear<- atan2(mapply(sum,sin(bear1*(pi/180))+sin(bear2*(pi/180))),mapply(sum,cos(bear1*(pi/180))+cos(bear2*(pi/180))))*(180/pi) - - #Take the average of the two - avbear<- c(avbear[1],avbear,avbear[length(avbear)]) - - #Calculate the destinated point taking a begin point, a bearing and a certain distance to travel - outpointr <- destFromBearing(xs,ys,(avbear+90+360)%%360,gearWidth/2) - outpointl <- destFromBearing(xs,ys,(avbear-90+360)%%360,gearWidth/2) - - singlePolygons <- list() - for(j in 1:(nrow(outpointr)-1)){ - singlePolygons[[j]] <- Polygon(cbind(c(outpointr[j,1],outpointl[j,1],outpointl[j+1,1],outpointr[j+1,1],outpointr[j,1]), - c(outpointr[j,2],outpointl[j,2],outpointl[j+1,2],outpointr[j+1,2],outpointr[j,2]))) - } - allPolygons[[counter]] <- Polygons(singlePolygons,ID=ac(counter)) - } -} -return(SpatialPolygons(allPolygons))} +#' Add width of gear to interpolated gear track +#' +#' Creates a SpatialPolygon object containing interpolated tracks including the +#' width of a gear and can be plotted as such. +#' +#' +#' @param interpolation interpolated dataset as obtained from the function +#' 'interpolateTacsat' +#' @param gearWidth Width of the gear in km +#' @return Returnes an object of class 'SpatialPolygon' from the 'sp' package. +#' Contains all polygons per interpolation, where each interpolation consists +#' of several polygons. +#' @author Niels T. Hintzen +#' @seealso \code{\link{interpolateTacsat}}, \code{\link{destFromBearing}} +#' @references EU Lot 2 project +#' @examples +#' +#' require(sp) +#' +#' data(tacsat) +#' +#' #Sort the VMS data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the Tacsat data +#' tacsat <- filterTacsat(tacsat,c(4,8),hd=NULL,remDup=TRUE) +#' +#' #Interpolate the VMS data +#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,res=100, +#' method="cHs",params=list(fm=0.5,distscale=20,sigline=0.2, +#' st=c(2,6)),headingAdjustment=0) +#' +#' res <- addWidth(interpolation,0.024) +#' plot(res) +#' +#' @export addWidth +addWidth <- function(interpolation,gearWidth){ + + +allPolygons <- list() +counter <- 0 +for(i in 1:length(interpolation)){ + + #Take the interpolated points + xs <- interpolation[[i]][-1,1] + ys <- interpolation[[i]][-1,2] + + if(all(is.na(xs))==F & all(is.na(ys))==FALSE){ + counter <- counter + 1 + #Calculate the bearing towards and away from each point + bear1 <- bearing(xs[1:(length(xs)-2)],ys[1:(length(xs)-2)],xs[2:(length(xs)-1)],ys[2:(length(xs)-1)]) + bear2 <- bearing(xs[2:(length(xs)-1)],ys[2:(length(xs)-1)],xs[3:length(xs)],ys[3:length(xs)]) + avbear<- atan2(mapply(sum,sin(bear1*(pi/180))+sin(bear2*(pi/180))),mapply(sum,cos(bear1*(pi/180))+cos(bear2*(pi/180))))*(180/pi) + + #Take the average of the two + avbear<- c(avbear[1],avbear,avbear[length(avbear)]) + + #Calculate the destinated point taking a begin point, a bearing and a certain distance to travel + outpointr <- destFromBearing(xs,ys,(avbear+90+360)%%360,gearWidth/2) + outpointl <- destFromBearing(xs,ys,(avbear-90+360)%%360,gearWidth/2) + + singlePolygons <- list() + for(j in 1:(nrow(outpointr)-1)){ + singlePolygons[[j]] <- Polygon(cbind(c(outpointr[j,1],outpointl[j,1],outpointl[j+1,1],outpointr[j+1,1],outpointr[j,1]), + c(outpointr[j,2],outpointl[j,2],outpointl[j+1,2],outpointr[j+1,2],outpointr[j,2]))) + } + allPolygons[[counter]] <- Polygons(singlePolygons,ID=ac(counter)) + } +} +return(SpatialPolygons(allPolygons))} diff --git a/vmstools/R/af.R b/vmstools/R/af.R index a8064e3..6e4b0f0 100644 --- a/vmstools/R/af.R +++ b/vmstools/R/af.R @@ -1,4 +1,20 @@ -`af` <- -function(x){return(as.factor(x))} - -#hello world +#' shortcut for as.factor +#' +#' Change the class of an object to factor +#' +#' +#' @param x object to turn into factor +#' @return as.factor attempts to coerce its argument to factor type +#' @author Niels T. Hintzen +#' @seealso \code{\link{as.factor}} +#' @references EU Lot 2 project +#' @examples +#' +#' as.factor(5) #returns the number 5 as class 'factor' +#' af(5) #returns the number 5 also as class 'factor' +#' +#' @export af +`af` <- +function(x){return(as.factor(x))} + +#hello world diff --git a/vmstools/R/an.R b/vmstools/R/an.R index cde2bd1..436659f 100644 --- a/vmstools/R/an.R +++ b/vmstools/R/an.R @@ -1,3 +1,19 @@ -`an` <- -function(x){return(as.numeric(x))} - +#' shortcut for as.numeric +#' +#' Change the class of an object to numeric +#' +#' +#' @param x object to turn into numeric +#' @return as.numeric attempts to coerce its argument to numeric type +#' @author Niels T. Hintzen +#' @seealso \code{\link{as.numeric}} +#' @references EU Lot 2 project +#' @examples +#' +#' as.numeric("5") #returns the character 5 as class 'numeric' +#' an("5") #returns the character 5 also as class 'numeric' +#' +#' @export an +`an` <- +function(x){return(as.numeric(x))} + diff --git a/vmstools/R/anf.r b/vmstools/R/anf.r index f876362..dc82088 100644 --- a/vmstools/R/anf.r +++ b/vmstools/R/anf.r @@ -1,2 +1,20 @@ - 'anf' <- - function(x) as.numeric(as.character(x)) # alias to convert factors +#' shortcut for as.numeric(as.character()) +#' +#' Change the class of an object from factor to numeric +#' +#' +#' @param x object to turn from factor into numeric +#' @return as.numeric attempts to coerce its argument to numeric type +#' @author Francois Bastardie +#' @seealso \code{\link{as.numeric}}, \code{\link{as.character}} +#' @references EU Lot 2 project +#' @examples +#' +#' +#' res <- as.factor(5.1) +#' an(res) #returns 1 +#' anf(res) #returns the original 5.1 +#' +#' @export anf + 'anf' <- + function(x) as.numeric(as.character(x)) # alias to convert factors diff --git a/vmstools/R/bearing.R b/vmstools/R/bearing.R index 10f1c13..13b89ed 100644 --- a/vmstools/R/bearing.R +++ b/vmstools/R/bearing.R @@ -1,17 +1,44 @@ -bearing <- function(lon,lat,lonRef,latRef){ - - x1 <- lon - y1 <- lat - x2 <- lonRef - y2 <- latRef - - y <- sin((x2-x1)*pi/180) * cos(y2*pi/180) - x <- cos(y1*pi/180) * sin(y2*pi/180) - sin(y1*pi/180) * cos(y2*pi/180) * cos((x2-x1)*pi/180) - bearing <- atan2(y,x)*180/pi - bearing <- (bearing + 360)%%360 - return(bearing)} - - -#bearing(3.15,51.64,3.15,51.65) - -#hello world \ No newline at end of file +#' Compute bearing between two points on a sphere (approximation of the earth) +#' at the starting point +#' +#' Compute the bearing between two GPS locations defined in longitude and +#' latitude notation on the earth. The earth is assumed to have a perfect +#' spherical shape. Bearing is returned in compass degrees. +#' +#' +#' @param lon Longitude of point 2 +#' @param lat Latitude of point 2 +#' @param lonRef Longitude of point 1 +#' @param latRef Latitude of point 1 +#' @author Niels T. Hintzen +#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}}, +#' \code{\link{lonLatRatio}},\code{\link{distance}} +#' @references EU Lot 2 project, based on the Haversine formula, see also: +#' Hintzen et al. 2010 Fisheries Research +#' @examples +#' +#' lon <- -4 +#' lat <- 50 +#' lonRef <- -4.2 +#' latRef <- 51 +#' +#' bearing(lon,lat,lonRef,latRef) #352.8271 +#' +#' @export bearing +bearing <- function(lon,lat,lonRef,latRef){ + + x1 <- lon + y1 <- lat + x2 <- lonRef + y2 <- latRef + + y <- sin((x2-x1)*pi/180) * cos(y2*pi/180) + x <- cos(y1*pi/180) * sin(y2*pi/180) - sin(y1*pi/180) * cos(y2*pi/180) * cos((x2-x1)*pi/180) + bearing <- atan2(y,x)*180/pi + bearing <- (bearing + 360)%%360 + return(bearing)} + + +#bearing(3.15,51.64,3.15,51.65) + +#hello world diff --git a/vmstools/R/bindAllMergedTables.r b/vmstools/R/bindAllMergedTables.r index 79ac5b3..169bb55 100644 --- a/vmstools/R/bindAllMergedTables.r +++ b/vmstools/R/bindAllMergedTables.r @@ -1,69 +1,184 @@ - - -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - bindAllMergedTables <- - function (vessels=character(), a.year='2009',species.to.keep=character(), - folder = file.path(), all.in.one.table=FALSE){ - - - bindAll <- function(vessels, a.year, species.to.keep, what, folder){ - - all.merged <- NULL # init - count <- 0 - for(a.vesselid in as.character(vessels)){ - count <- count+1 - print(count/length(vessels)*100) - cat(paste("load 'merged' for",a.vesselid,"\n")) - er <- try(load(file = file.path(folder, paste('merged_',a.vesselid,'_',a.year,'.RData',sep=''))) - , silent=TRUE) # get the 'merged' table for this vessel - - - if(class(er)!="try-error") { - if(length(species.to.keep)!=0){ - get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) - nm <- names(merged) - idx.col.w <- grep('KG', nm) # index columns weight - all.sp <- get.sp(nm[idx.col.w]) - species.to.merge <- all.sp[!all.sp %in% species.to.keep] - # merge to other sp - merged$LE_EURO_MZZ <- replace(merged$LE_EURO_MZZ, is.na(merged$LE_EURO_MZZ), 0) - merged$LE_EURO_MZZ <- merged$LE_EURO_MZZ + apply(merged[, paste('LE_EURO_',species.to.merge,sep='')], 1, sum, na.rm=TRUE) - merged <- merged[, !colnames(merged) %in% paste('LE_EURO_',species.to.merge,sep='')] - merged$LE_KG_MZZ <- replace(merged$LE_KG_MZZ, is.na(merged$LE_KG_MZZ), 0) - merged$LE_KG_MZZ <- merged$LE_KG_MZZ + apply(merged[, paste('LE_KG_',species.to.merge,sep='')], 1, sum, na.rm=TRUE) - merged <- merged[, !colnames(merged) %in% paste('LE_KG_',species.to.merge,sep='')] - } - nm <- names(merged) - idx.col.w <- grep('KG', nm) # index columns with species weight - idx.col.v <- grep('EURO', nm) # index columns with species value - if(length(what)==0){ idx.col <- c(idx.col.w, idx.col.v) # get all (but possible out of memory crash) - } else{ - if(what=='weight') idx.col <- idx.col.w - if(what=='value') idx.col <- idx.col.v - } - # KEEP THE ESSENTIAL - merged <- merged[,c( "VE_REF", "FT_REF", "VE_FLT", "LE_MET_level6", "LE_GEAR", - "SI_LATI","SI_LONG", "SI_SP", "SI_HE", "SI_STATE", "SI_DATE", "SI_TIME", "SI_HARB", - nm[idx.col], 'LE_EFF_VMS', 'KW_HOURS', - "flag")] - all.merged <- rbind.data.frame(all.merged, merged) - } else cat(paste("failure for",a.vesselid,"\n")) - print(nrow(all.merged)) - } - # save - save("all.merged", file = file.path(folder, paste('all_merged_',what,'_',a.year,'.RData',sep='') )) - - return() - } - - # calls - bindAll (vessels, a.year, species.to.keep, what='weight', folder) - bindAll (vessels,a.year, species.to.keep, what='value', folder) - if(all.in.one.table) bindAll (vessels, a.year, species.to.keep, what=character(), folder) - - - - return() - } - +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + + +#' bind all individual merged tables into a big one +#' +#' Bind all individual merged tables into one table. +#' +#' Possibly, merge irrelevant species into an other category to reduce the size +#' of the output object. Possibly, keep only the weight, or the value per +#' species. Both by default, but possible out of memory crash. +#' +#' @param vessels Vessel ID +#' @param a.year Year of the data +#' @param species.to.keep Array of species to keep +#' @param folder Local folder path +#' @param all.in.one.table Logical: if all data should be merged into one table +#' @author Francois Bastardie +#' @examples +#' +#' \dontrun{ +#' data(eflalo) +#' data(tacsat) +#' data(euharbours); euharbours <- harbours +#' +#' # format +#' eflalo <- formatEflalo(eflalo) +#' tacsat <- formatTacsat(tacsat) +#' +#' # order tacsat chronologically with library(doBy) +#' tacsat <- sortTacsat(tacsat) +#' +#' # test each ping if in harbour or not +#' tacsat$SI_HARB <- NA +#' euharbours$Description <- euharbours$harbour +#' tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG), +#' lat=anf(tacsat$SI_LATI), +#' harbours=euharbours, +#' rowSize=30, returnNames=TRUE) +#' inHarb <- tacsat$SI_HARB +#' inHarb <- replace(inHarb, !is.na(inHarb), 1) +#' inHarb <- replace(inHarb, is.na(inHarb), 0) +#' inHarb <- as.numeric(inHarb) +#' +#' # assign a trip identifier +#' tacsat$SI_FT <- 1 # init +#' idx <- which(inHarb==0) +#' tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index +#' +#' # keep 'out of harbour' points only +#' # (but keep the departure point and the arrival point lying in the harbour) +#' startTrip <- c(diff(tacsat[,"SI_FT"]), 0) +#' endTrip <- c(0, diff(tacsat[,"SI_FT"])) +#' tacsat[which(startTrip>0),"SI_FT"] <- tacsat[which(startTrip>0)+1,"SI_FT"] +#' tacsat[which(endTrip<0),"SI_FT"] <- tacsat[which(endTrip<0)-1,"SI_FT"] +#' tacsat <- tacsat[which(inHarb==0 | startTrip>0 | endTrip<0),] +#' +#' +#' # assign a state to each ping (here, useless if detectFishing at TRUE) +#' tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming) +#' # fake speed rule for fishing state +#' tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1 +#' +#' +#' # reduce the size of the eflalo data by merging species +#' # (assuming that the other species is coded MZZ), threshold in euros. +#' eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ") +#' +#' # debug if eflalo has not been cleaned earlier +#' eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),] +#' +#' # an informed VE_FLT is also required +#' if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1" +#' +#' # possible mis-naming mistakes +#' if(!match('LE_MET_level6',colnames(eflalo))>0){ +#' eflalo$LE_MET_level6 <- eflalo$LE_MET +#' } +#' +#' # debug +#' eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",] +#' +#' +#' # TEST FOR A GIVEN SET OF VESSELS +#' # (if detect.fishing is true then do also detection of fishing activity +#' # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states +#' # that will overwrite the existing SI_STATE) +#' mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"), +#' general=list(output.path=file.path("C:","output"), +#' visual.check=TRUE, detectFishing=TRUE, speed="segment", +#' what.speed="calculated")) +#' # ...OR APPLY FOR ALL VESSELS IN eflalo +#' mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, +#' general=list(output.path=file.path("C:","output"), +#' visual.check=TRUE, detectFishing=TRUE, speed="segment", +#' what.speed="calculated")) +#' gc(reset=TRUE) +#' +#' # load the merged output table for one vessel +#' load(file.path("C:","output","merged_804_1800.RData")) +#' +#' # check the conservation of landings +#' sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE)) +#' sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE) +#' +#' +#' # ...or bind all vessels (keeping only some given species here) +#' bindAllMergedTables (vessels=c("738", "804"), a.year = "1800", +#' species.to.keep=c("PLE","COD"), +#' folder = file.path("C:","output"), +#' all.in.one.table=TRUE) +#' +#' # ...and load the merged output table for all vessels +#' load(file.path("C:","output","all_merged__1800.RData")) +#' +#' } +#' +#' +#' @export bindAllMergedTables + bindAllMergedTables <- + function (vessels=character(), a.year='2009',species.to.keep=character(), + folder = file.path(), all.in.one.table=FALSE){ + + + bindAll <- function(vessels, a.year, species.to.keep, what, folder){ + + all.merged <- NULL # init + count <- 0 + for(a.vesselid in as.character(vessels)){ + count <- count+1 + print(count/length(vessels)*100) + cat(paste("load 'merged' for",a.vesselid,"\n")) + er <- try(load(file = file.path(folder, paste('merged_',a.vesselid,'_',a.year,'.RData',sep=''))) + , silent=TRUE) # get the 'merged' table for this vessel + + + if(class(er)!="try-error") { + if(length(species.to.keep)!=0){ + get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) + nm <- names(merged) + idx.col.w <- grep('KG', nm) # index columns weight + all.sp <- get.sp(nm[idx.col.w]) + species.to.merge <- all.sp[!all.sp %in% species.to.keep] + # merge to other sp + merged$LE_EURO_MZZ <- replace(merged$LE_EURO_MZZ, is.na(merged$LE_EURO_MZZ), 0) + merged$LE_EURO_MZZ <- merged$LE_EURO_MZZ + apply(merged[, paste('LE_EURO_',species.to.merge,sep='')], 1, sum, na.rm=TRUE) + merged <- merged[, !colnames(merged) %in% paste('LE_EURO_',species.to.merge,sep='')] + merged$LE_KG_MZZ <- replace(merged$LE_KG_MZZ, is.na(merged$LE_KG_MZZ), 0) + merged$LE_KG_MZZ <- merged$LE_KG_MZZ + apply(merged[, paste('LE_KG_',species.to.merge,sep='')], 1, sum, na.rm=TRUE) + merged <- merged[, !colnames(merged) %in% paste('LE_KG_',species.to.merge,sep='')] + } + nm <- names(merged) + idx.col.w <- grep('KG', nm) # index columns with species weight + idx.col.v <- grep('EURO', nm) # index columns with species value + if(length(what)==0){ idx.col <- c(idx.col.w, idx.col.v) # get all (but possible out of memory crash) + } else{ + if(what=='weight') idx.col <- idx.col.w + if(what=='value') idx.col <- idx.col.v + } + # KEEP THE ESSENTIAL + merged <- merged[,c( "VE_REF", "FT_REF", "VE_FLT", "LE_MET_level6", "LE_GEAR", + "SI_LATI","SI_LONG", "SI_SP", "SI_HE", "SI_STATE", "SI_DATE", "SI_TIME", "SI_HARB", + nm[idx.col], 'LE_EFF_VMS', 'KW_HOURS', + "flag")] + all.merged <- rbind.data.frame(all.merged, merged) + } else cat(paste("failure for",a.vesselid,"\n")) + print(nrow(all.merged)) + } + # save + save("all.merged", file = file.path(folder, paste('all_merged_',what,'_',a.year,'.RData',sep='') )) + + return() + } + + # calls + bindAll (vessels, a.year, species.to.keep, what='weight', folder) + bindAll (vessels,a.year, species.to.keep, what='value', folder) + if(all.in.one.table) bindAll (vessels, a.year, species.to.keep, what=character(), folder) + + + + return() + } + diff --git a/vmstools/R/building_tab_pca.r b/vmstools/R/building_tab_pca.r index b344d0d..6a98f34 100644 --- a/vmstools/R/building_tab_pca.r +++ b/vmstools/R/building_tab_pca.r @@ -1,11 +1,39 @@ -######################################### -# Build the table with the main species # -######################################### - -building_tab_pca=function(data,especes){ - p=ncol(data) - noms=colnames(data) - ind_princ=which(is.element(noms,especes)) - princ=data[,ind_princ] - return(princ) -} +######################################### +# Build the table with the main species # +######################################### + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export building_tab_pca +building_tab_pca=function(data,especes){ + p=ncol(data) + noms=colnames(data) + ind_princ=which(is.element(noms,especes)) + princ=data[,ind_princ] + return(princ) +} diff --git a/vmstools/R/c.listquote.r b/vmstools/R/c.listquote.r index 7f879c7..c506b63 100644 --- a/vmstools/R/c.listquote.r +++ b/vmstools/R/c.listquote.r @@ -1,23 +1,42 @@ -c.listquote <- function( ... ) { - - args <- as.list( match.call()[ -1 ] ) - lstquote <- list( as.symbol( "list" ) ); - for ( i in args ) { - # Evaluate expression in parent eviron to see what it refers to - if ( class( i ) == "name" || ( class( i ) == "call" && i[[1]] != "list" ) ) { - i <- eval( substitute( i ), sys.frame( sys.parent() ) ) - } - if ( class( i ) == "call" && i[[1]] == "list" ) { - lstquote <- c( lstquote, as.list( i )[ -1 ] ) - } - else if ( class( i ) == "character" ) - { - for ( chr in i ) { - lstquote <- c( lstquote, list( parse( text=chr )[[1]] ) ) - } - } - else - stop( paste( "[", deparse( substitute( i ) ), "] Unknown class [", class( i ), "] or is not a list()", sep="" ) ) - } - return( as.call( lstquote ) ) -} +#' Produce quoted list for use within data.table +#' +#' Produce the quoted list format from a character vector which is used as +#' input for the data.table format +#' +#' character vector could contain e.g. colnames(x) or c("COL1","COL2","COL3") +#' where COL1 etc are column names +#' +#' @param \dots character vector +#' @return Returnes quoted list +#' @note With great thanks to original author. +#' @author Niels T. Hintzen +#' @seealso See \code{\link{data.table}} documentation +#' @references See \code{\link{data.table}} documentation +#' @examples +#' +#' c.listquote(c("COL1","COL2","COL3")) #Returns: list(COL1,COL2,COL3) +#' +#' @export c.listquote +c.listquote <- function( ... ) { + + args <- as.list( match.call()[ -1 ] ) + lstquote <- list( as.symbol( "list" ) ); + for ( i in args ) { + # Evaluate expression in parent eviron to see what it refers to + if ( class( i ) == "name" || ( class( i ) == "call" && i[[1]] != "list" ) ) { + i <- eval( substitute( i ), sys.frame( sys.parent() ) ) + } + if ( class( i ) == "call" && i[[1]] == "list" ) { + lstquote <- c( lstquote, as.list( i )[ -1 ] ) + } + else if ( class( i ) == "character" ) + { + for ( chr in i ) { + lstquote <- c( lstquote, list( parse( text=chr )[[1]] ) ) + } + } + else + stop( paste( "[", deparse( substitute( i ) ), "] Unknown class [", class( i ), "] or is not a list()", sep="" ) ) + } + return( as.call( lstquote ) ) +} diff --git a/vmstools/R/calc_mcp.r b/vmstools/R/calc_mcp.r index 1da16f5..7b21e3c 100644 --- a/vmstools/R/calc_mcp.r +++ b/vmstools/R/calc_mcp.r @@ -1,95 +1,134 @@ -calc_mcp <- function (id = 1, points = NULL, filename = "MCP_Output.txt", - verbose = FALSE, pct = 100) -{ - require(adehabitat) - - #This is taken from gpclib package - mcp.area <- function(xy, id, percent = seq(20, 100, by = 5), - unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){ - unin <- match.arg(unin) - unout <- match.arg(unout) - if (length(id) != nrow(xy)) - stop("xy and id should be of the same length") - xy <- xy[!is.na(xy[, 1]), ] - xy <- xy[!is.na(xy[, 2]), ] - id <- id[!is.na(xy[, 1])] - id <- id[!is.na(xy[, 2])] - lev <- percent - res <- list() - ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id))) - lixy <- split(xy, id) - le <- names(lixy) - for (i in 1:length(lev)) { - ar[i, ] <- unlist(lapply(lixy, function(z) { - res <- mcp(z, rep(1, nrow(z)), percent = lev[i]) - class(res) <- "data.frame" - return(area.poly(as(res[, 2:3], "gpc.poly"))) - })) - } - ar <- as.data.frame(ar) - names(ar) <- le - if (unin == "m") { - if (unout == "ha") - ar <- ar/10000 - if (unout == "km2") - ar <- ar/1e+06 - } - if (unin == "km") { - if (unout == "ha") - ar <- ar * 100 - if (unout == "m2") - ar <- ar * 1e+06 - } - row.names(ar) <- lev - class(ar) <- c("hrsize", "data.frame") - attr(ar, "units") <- unout - if (plotit) - plot(ar) - return(ar) - } - - - errorcode <- 1000 - if ((pct > 100) || (pct < 0)) { - errorcode <- 100 - if (verbose) { - cat("\n\nWARNING: Supplied percentage must be between 0 and 100 (inclusive).") - cat("\nERROR CODE: ", errorcode, "\n\n", sep = "") - } - return("ERROR") - } - if (length(dim(points)) != 2) { - errorcode <- 71 - if (verbose) { - cat("\n\nWARNING: Provided points input matrix has fewer than 2 columns.") - cat("\nERROR CODE: ", errorcode, "\n\n", sep = "") - } - return("ERROR") - } - if (dim(points)[2] != 2) { - errorcode <- 70 - if (verbose) { - cat("\n\nWARNING: Provided points input matrix has too many columns, only 2 are allowed.") - cat("\nERROR CODE: ", errorcode, "\n\n", sep = "") - } - return("ERROR") - } - else { - temp <- as.data.frame(cbind(1, points)) - temp[, 1] <- as.factor(temp[, 1]) - MCP <- (mcp(temp[, 2:3], temp[, 1], percent = pct)) - area <- mcp.area(temp[, 2:3], temp[, 1], percent = pct, - unin = "m", unout = "km2", plotit = FALSE) - } - coordsMCP <- cbind(id, MCP) - tmp <- coordsMCP[, 2:4] - outtabMCP <- cbind(id, tmp) - write.table(outtabMCP, sep = ",", append = TRUE, file = filename, - col.names = FALSE) - r.MCP <- list(MCP = MCP, points = points, id = id, MCP.area = area, - MCP.pct = pct) - assign("r.MCP", r.MCP, pos = 1) - mcp.result <- list(id = id, MCP.area = area, MCP.pct = pct, - MCP.coords = MCP) - return(mcp.result) -} +#' Computing the Minimum Convex Polygon (MCP) +#' +#' This function computes the Minimum Convex Polygon (MCP) from a set of +#' points. The MCP is the minimum area polygon containing a set of point +#' locations. +#' +#' This function is most powerful when used repetitively within a loop to +#' compute the MCP for subsets of points stored in a large data table. +#' +#' @param id Provide a unique integer to identify an MCP from others that you +#' may construct with other data points +#' @param points Two-column matrix or data frame of point coordinates +#' @param filename A character name for an ASCII output file +#' @param verbose Boolean: set to TRUE if extended processing feedback is +#' wanted +#' @param pct Integer 0 <= pct <=100, the percentage of the MCP for which area +#' is provided +#' @return The returned result is a list: %% If it is a LIST, use +#' \item{MCP.area }{The area of the MCP in square kilometers} \item{MCP.pct +#' }{The desired percentage of the MCP for which the area is computed} +#' \item{MPC.coords}{A matrix containing MCP vertices. Each row represents a +#' unique point, the first column contains x-coordinates, and the second, +#' y-coordinates } +#' @note Results are stored in the r.MCP object (required for graphical +#' visualization using plot_mcp). This function can be used on its own (once) +#' or repetitively in a loop to process grouped point data stored in a larger +#' table. When used repetitively, be sure to increment the id parameter to +#' ensure that each MCP has a unique identifier. The output ASCII coordinate +#' file can be further processed using the makeshapes function to generate an +#' ESRI Shapefile for MCP polygons. +#' @author Randy Bui, Ron N. Buliung, Tarmo K. Remmel +#' @references Builds upon MCP functions available in the adehabitat package +#' @examples +#' +#' data(tacsat) +#' calc_mcp(id=1, points = tacsat[1:10,c("SI_LONG","SI_LATI")], filename="MCP_Output.txt", +#' verbose = FALSE, pct = 100) +#' +#' @export calc_mcp +calc_mcp <- function (id = 1, points = NULL, filename = "MCP_Output.txt", + verbose = FALSE, pct = 100) +{ + require(adehabitat) + + #This is taken from gpclib package + mcp.area <- function(xy, id, percent = seq(20, 100, by = 5), + unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){ + unin <- match.arg(unin) + unout <- match.arg(unout) + if (length(id) != nrow(xy)) + stop("xy and id should be of the same length") + xy <- xy[!is.na(xy[, 1]), ] + xy <- xy[!is.na(xy[, 2]), ] + id <- id[!is.na(xy[, 1])] + id <- id[!is.na(xy[, 2])] + lev <- percent + res <- list() + ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id))) + lixy <- split(xy, id) + le <- names(lixy) + for (i in 1:length(lev)) { + ar[i, ] <- unlist(lapply(lixy, function(z) { + res <- mcp(z, rep(1, nrow(z)), percent = lev[i]) + class(res) <- "data.frame" + return(area.poly(as(res[, 2:3], "gpc.poly"))) + })) + } + ar <- as.data.frame(ar) + names(ar) <- le + if (unin == "m") { + if (unout == "ha") + ar <- ar/10000 + if (unout == "km2") + ar <- ar/1e+06 + } + if (unin == "km") { + if (unout == "ha") + ar <- ar * 100 + if (unout == "m2") + ar <- ar * 1e+06 + } + row.names(ar) <- lev + class(ar) <- c("hrsize", "data.frame") + attr(ar, "units") <- unout + if (plotit) + plot(ar) + return(ar) + } + + + errorcode <- 1000 + if ((pct > 100) || (pct < 0)) { + errorcode <- 100 + if (verbose) { + cat("\n\nWARNING: Supplied percentage must be between 0 and 100 (inclusive).") + cat("\nERROR CODE: ", errorcode, "\n\n", sep = "") + } + return("ERROR") + } + if (length(dim(points)) != 2) { + errorcode <- 71 + if (verbose) { + cat("\n\nWARNING: Provided points input matrix has fewer than 2 columns.") + cat("\nERROR CODE: ", errorcode, "\n\n", sep = "") + } + return("ERROR") + } + if (dim(points)[2] != 2) { + errorcode <- 70 + if (verbose) { + cat("\n\nWARNING: Provided points input matrix has too many columns, only 2 are allowed.") + cat("\nERROR CODE: ", errorcode, "\n\n", sep = "") + } + return("ERROR") + } + else { + temp <- as.data.frame(cbind(1, points)) + temp[, 1] <- as.factor(temp[, 1]) + MCP <- (mcp(temp[, 2:3], temp[, 1], percent = pct)) + area <- mcp.area(temp[, 2:3], temp[, 1], percent = pct, + unin = "m", unout = "km2", plotit = FALSE) + } + coordsMCP <- cbind(id, MCP) + tmp <- coordsMCP[, 2:4] + outtabMCP <- cbind(id, tmp) + write.table(outtabMCP, sep = ",", append = TRUE, file = filename, + col.names = FALSE) + r.MCP <- list(MCP = MCP, points = points, id = id, MCP.area = area, + MCP.pct = pct) + assign("r.MCP", r.MCP, pos = 1) + mcp.result <- list(id = id, MCP.area = area, MCP.pct = pct, + MCP.coords = MCP) + return(mcp.result) +} diff --git a/vmstools/R/calculateCI.R b/vmstools/R/calculateCI.R index 96fd392..00facc6 100644 --- a/vmstools/R/calculateCI.R +++ b/vmstools/R/calculateCI.R @@ -1,72 +1,120 @@ -calculateCI <- function( int - ,tacint - ,params - ,grid - ,spatialGrid - ,plot=FALSE){ - - if (!"SI_DATIM" %in% colnames(tacint)) - tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME, - sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - mxr <- maxRangeCI(x =c(int[-1,1][1],rev(int[-1,1])[1]), - y =c(int[-1,2][1],rev(int[-1,2])[1]), - Time=an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")), - speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 / - an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2))) - - if(plot){ - par(mfrow=c(2,2)) - plot(mxr[[1]][,1],mxr[[1]][,2],type="l",xlab="Longitude",ylab="Latitude",asp=1/lonLatRatio(mxr[[1]][1,1],mxr[[1]][1,2]),main=paste(int[1,])) - lines(int[-1,1],int[-1,2],col=2) - } - - xrange <- range(mxr[[1]][,1]); yrange <- range(mxr[[1]][,2]) - xrg <- range(int[-1,1]); yrg <- range(int[-1,2]) - if(xrange[1] > xrg[1]) xrange[1] <- xrg[1] - diff(xrg)*0.1 - if(xrange[2] < xrg[2]) xrange[2] <- xrg[2] + diff(xrg)*0.1 - if(yrange[1] > yrg[1]) yrange[1] <- yrg[1] - diff(yrg)*0.1 - if(yrange[2] < yrg[2]) yrange[2] <- yrg[2] - diff(yrg)*0.1 - - newxrange <- c((xrange[1] - grid@cellcentre.offset[1]) %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1], - (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1]) - newyrange <- c((yrange[1] - grid@cellcentre.offset[2]) %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2], - (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2]) - - origxs <- seq(grid@cellcentre.offset[1],grid@cellcentre.offset[1] + grid@cellsize[1] * (grid@cells.dim[1]-1),by= grid@cellsize[1]) - origys <- seq(grid@cellcentre.offset[2],grid@cellcentre.offset[2] + grid@cellsize[2] * (grid@cells.dim[2]-1),by= grid@cellsize[2]) - - if((min(newxrange) > bbox(spatialGrid)[1,"max"]) | (max(newxrange) < bbox(spatialGrid)[1,"min"])) stop("CI outside grid") - if((min(newyrange) > bbox(spatialGrid)[2,"max"]) | (max(newyrange) < bbox(spatialGrid)[2,"min"])) stop("CI outside grid") - - subxs <- apply(abs(outer(origxs,newxrange,"-")),2,which.min) - subys <- apply(abs(outer(origys,newyrange,"-")),2,which.min) - - grid <- spatialGrid[sort(grid@cells.dim[2]-seq(subys[1],subys[2])+1),seq(subxs[1],subxs[2])] - grid <- as(grid,"SpatialGridDataFrame") -# grid <- createGrid(c((xrange[1] - grid@cellcentre.offset[1]) %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1], -# (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1]), -# c((yrange[1] - grid@cellcentre.offset[2]) %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2], -# (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2]), -# grid@cellsize[1],grid@cellsize[2],type="SpatialGridDataFrame") - coords <- coordinates(grid) - - # Distance to begin or end point - bpDistan <- distance(tacint$SI_LONG[1],tacint$SI_LATI[1],coords[,1],coords[,2]) - epDistan <- distance(tacint$SI_LONG[2],tacint$SI_LATI[2],coords[,1],coords[,2]) - pdistan <- pmin(bpDistan,epDistan) - - if(plot){ image(t(matrix(pdistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()} - - # Distance to interpolation - intDistan <- do.call(pmin,lapply(as.list(2:nrow(int)),function(x){distance(int[x,1],int[x,2],coords[,1],coords[,2])})) - - if(plot){ image(t(matrix(intDistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()} - - CI <- N1p0(intDistan*params$distscale,0,pdistan^params$sigline,0) - if(max(CI,na.rm=TRUE) < 0.1) warning("Prediction max(CI) is very small") - - if(plot){ image(t(matrix(CI,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()} - grid@data <- data.frame(data=rep(0,nrow(coords))) - grid@data$data <- CI -return(grid)} - +#' Calculate the CI between two succeeding VMS datapoints +#' +#' The interpolated tracks can be surrounded by a sort of confidence interval +#' representing the outer region a vessel could have travelled between two +#' succeeding datapoints. Within this function the CI's are computed. +#' +#' +#' @param int interpolation, as data.frame from 'interpolateTacsat' +#' @param tacint tacsat records (two rows) corresponding with the interpolation +#' @param params list of parameters used to perform interpolation +#' @param grid object of class 'GridTopology' specifying the grid dimensions +#' @param plot Logical. Whether the result of the interpolation must be plotted +#' @return Returns the Confidence Interval on a grid of class +#' 'SpatialGridDataFrame' with the CI values in the data slot. +#' @note Computation can take a very long time if either grid resolution is +#' high or if many interpolations are used. +#' @author Niels T. Hintzen +#' @seealso \code{\link{interpolateTacsat}},\code{\link{maxRangeCI}} +#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using +#' cubic Hermite spline interpolation of position registration data, EU lot 2 +#' project +#' @examples +#' +#' data(tacsat) +#' +#' #Sort the Tacsat data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the Tacsat data +#' tacsat <- filterTacsat(tacsat,c(2,6),hd=NULL,remDup=TRUE) +#' +#' #Interpolate the VMS data +#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10, +#' res=100,method="cHs",params=list(fm=0.5,distscale=20, +#' sigline=0.2,st=c(2,6)),headingAdjustment=0) +#' +#' #Create the final grid where all interpolations should fit on +#' xrange <- c(2,3); yrange <- c(51,52) +#' grid <- createGrid(xrange,yrange,resx=0.01,resy=0.005) +#' +#' res <- calculateCI(interpolation[[4]], +#' tacsat[interpolation[[4]][1,],], +#' params=list(fm=0.25,distscale=3.1,sigline=0.4,st=c(2,6)), +#' grid=grid, +#' plot=TRUE) +#' +#' @export calculateCI +calculateCI <- function( int + ,tacint + ,params + ,grid + ,spatialGrid + ,plot=FALSE){ + + if (!"SI_DATIM" %in% colnames(tacint)) + tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME, + sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + mxr <- maxRangeCI(x =c(int[-1,1][1],rev(int[-1,1])[1]), + y =c(int[-1,2][1],rev(int[-1,2])[1]), + Time=an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")), + speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 / + an(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2))) + + if(plot){ + par(mfrow=c(2,2)) + plot(mxr[[1]][,1],mxr[[1]][,2],type="l",xlab="Longitude",ylab="Latitude",asp=1/lonLatRatio(mxr[[1]][1,1],mxr[[1]][1,2]),main=paste(int[1,])) + lines(int[-1,1],int[-1,2],col=2) + } + + xrange <- range(mxr[[1]][,1]); yrange <- range(mxr[[1]][,2]) + xrg <- range(int[-1,1]); yrg <- range(int[-1,2]) + if(xrange[1] > xrg[1]) xrange[1] <- xrg[1] - diff(xrg)*0.1 + if(xrange[2] < xrg[2]) xrange[2] <- xrg[2] + diff(xrg)*0.1 + if(yrange[1] > yrg[1]) yrange[1] <- yrg[1] - diff(yrg)*0.1 + if(yrange[2] < yrg[2]) yrange[2] <- yrg[2] - diff(yrg)*0.1 + + newxrange <- c((xrange[1] - grid@cellcentre.offset[1]) %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1], + (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1]) + newyrange <- c((yrange[1] - grid@cellcentre.offset[2]) %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2], + (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2]) + + origxs <- seq(grid@cellcentre.offset[1],grid@cellcentre.offset[1] + grid@cellsize[1] * (grid@cells.dim[1]-1),by= grid@cellsize[1]) + origys <- seq(grid@cellcentre.offset[2],grid@cellcentre.offset[2] + grid@cellsize[2] * (grid@cells.dim[2]-1),by= grid@cellsize[2]) + + if((min(newxrange) > bbox(spatialGrid)[1,"max"]) | (max(newxrange) < bbox(spatialGrid)[1,"min"])) stop("CI outside grid") + if((min(newyrange) > bbox(spatialGrid)[2,"max"]) | (max(newyrange) < bbox(spatialGrid)[2,"min"])) stop("CI outside grid") + + subxs <- apply(abs(outer(origxs,newxrange,"-")),2,which.min) + subys <- apply(abs(outer(origys,newyrange,"-")),2,which.min) + + grid <- spatialGrid[sort(grid@cells.dim[2]-seq(subys[1],subys[2])+1),seq(subxs[1],subxs[2])] + grid <- as(grid,"SpatialGridDataFrame") +# grid <- createGrid(c((xrange[1] - grid@cellcentre.offset[1]) %/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1], +# (xrange[2] - grid@cellcentre.offset[1] + grid@cellsize[1])%/%grid@cellsize[1] * grid@cellsize[1] + grid@cellcentre.offset[1]), +# c((yrange[1] - grid@cellcentre.offset[2]) %/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2], +# (yrange[2] - grid@cellcentre.offset[2] + grid@cellsize[2])%/%grid@cellsize[2] * grid@cellsize[2] + grid@cellcentre.offset[2]), +# grid@cellsize[1],grid@cellsize[2],type="SpatialGridDataFrame") + coords <- coordinates(grid) + + # Distance to begin or end point + bpDistan <- distance(tacint$SI_LONG[1],tacint$SI_LATI[1],coords[,1],coords[,2]) + epDistan <- distance(tacint$SI_LONG[2],tacint$SI_LATI[2],coords[,1],coords[,2]) + pdistan <- pmin(bpDistan,epDistan) + + if(plot){ image(t(matrix(pdistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()} + + # Distance to interpolation + intDistan <- do.call(pmin,lapply(as.list(2:nrow(int)),function(x){distance(int[x,1],int[x,2],coords[,1],coords[,2])})) + + if(plot){ image(t(matrix(intDistan,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()} + + CI <- N1p0(intDistan*params$distscale,0,pdistan^params$sigline,0) + if(max(CI,na.rm=TRUE) < 0.1) warning("Prediction max(CI) is very small") + + if(plot){ image(t(matrix(CI,ncol=grid@grid@cells.dim[1],nrow=grid@grid@cells.dim[2],byrow=TRUE)[grid@grid@cells.dim[2]:1,]),col=rev(heat.colors(12))); box()} + grid@data <- data.frame(data=rep(0,nrow(coords))) + grid@data$data <- CI +return(grid)} + diff --git a/vmstools/R/calculateSpeed.r b/vmstools/R/calculateSpeed.r index dd1b905..6870867 100644 --- a/vmstools/R/calculateSpeed.r +++ b/vmstools/R/calculateSpeed.r @@ -1,106 +1,144 @@ -calculateSpeed <- function(tacsat,level="vessel",weight=c(1,1),fill.na=FALSE){ - - if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector") - weight <- weight / sum(weight,na.rm=TRUE) - - #- Add interval between succeeding points - tacsatp <- intervalTacsat(tacsat,level=level,weight=weight,fill.na=fill.na) - - if(!"SI_DATIM" %in% colnames(tacsatp)) tacsatp$SI_DATIM <- as.POSIXct(paste(tacsatp$SI_DATE, tacsatp$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - if(level=="trip"){ - if(is.null(tacsatp$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level") - sptacsat <- split(tacsatp,tacsatp$VE_REF) - tacsatp$SI_SPCA <- unlist(lapply(sptacsat,function(x){ - - FT_REF <- as.factor(x$FT_REF); - res <- by(x,FT_REF, - function(y){ - if(nrow(y)>1){ - interval <- y$INTV - distance_xmin1 <- c(NA,distance(y$SI_LONG[2:nrow(y)], y$SI_LATI[2:nrow(y)], - y$SI_LONG[1:(nrow(y)-1)],y$SI_LATI[1:(nrow(y)-1)])) - distance_xplus1 <- c(distance_xmin1[-1],NA) - if(any(weight == 0)){ - if(weight[1] == 0) SI_SPCA <- distance_xplus1 / (interval/60) - if(weight[2] == 0) SI_SPCA <- distance_xmin1 / (interval/60) - } else { - difftime_xmin1 <- intervalTacsat(y,level="trip",weight=c(1,0),fill.na=fill.na)$INTV - difftime_xplus1 <- intervalTacsat(y,level="trip",weight=c(0,1),fill.na=fill.na)$INTV - SI_SPCA <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) / - (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60)))) - } - #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different - # weight setting. - if(fill.na==TRUE){ - idx <- which(is.na(SI_SPCA)==TRUE) - if(weight[1] == 0 | weight[2] == 0){ - speeds <- cbind((distance_xmin1[idx] * weight[1]) / (interval[idx]/60), - (distance_xplus1[idx] * weight[2]) / (interval[idx]/60)) - } else { - speeds <- cbind((distance_xmin1[idx] * weight[1]) / (difftime_xmin1[idx] /60), - (distance_xplus1[idx] * weight[2]) / (difftime_xplus1[idx]/60)) - } - #- Use of 0.5 if ifelse = NA, as counter to 2*... - SI_SPCA[idx] <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))}) - #SI_SPCA[idx] <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) / - # (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1]) + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2]))) - if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA - } - - return(SI_SPCA) - } else { - return(NA) - } - }) - return(unsplit(res,FT_REF))})) - tacsatp$SI_SPCA[which(tacsatp$FT_REF == "0")] <- NA - - } - if(level=="vessel"){ - #- Take the interval for weights c(0,1) and c(1,0) direct from tacsat - interval <- tacsatp$INTV - distance_xmin1 <- c(NA,distance(tacsatp$SI_LONG[2:nrow(tacsatp)], tacsatp$SI_LATI[2:nrow(tacsatp)], - tacsatp$SI_LONG[1:(nrow(tacsatp)-1)], tacsatp$SI_LATI[1:(nrow(tacsatp)-1)])) - distance_xplus1 <- c(distance_xmin1[-1],NA) - #- Recalculate the interval rate for either outer averages - difftime_xmin1 <- intervalTacsat(tacsatp,level="vessel",weight=c(1,0),fill.na=fill.na)$INTV - difftime_xplus1 <- intervalTacsat(tacsatp,level="vessel",weight=c(0,1),fill.na=fill.na)$INTV - if(any(weight == 0)){ - if(weight[1] == 0) SI_SPCA <- distance_xplus1 / (interval/60) - if(weight[2] == 0) SI_SPCA <- distance_xmin1 / (interval/60) - } else { - SI_SPCA <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) / - (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60)))) - } - #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different - # weight setting. - if(fill.na==TRUE){ - idx <- which(is.na(SI_SPCA)==TRUE) - if(weight[1] == 0 | weight[2] == 0){ - speeds <- cbind((distance_xmin1[idx] * weight[1]) / (interval[idx]/60), - (distance_xplus1[idx] * weight[2]) / (interval[idx]/60)) - } else { - speeds <- cbind((distance_xmin1[idx] * weight[1]) / (difftime_xmin1[idx] /60), - (distance_xplus1[idx] * weight[2]) / (difftime_xplus1[idx]/60)) - } - #- Use of 0.5 if ifelse = NA, as counter to 2*... - SI_SPCA[idx] <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))}) - #SI_SPCA[idx] <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) / - # (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1]) + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2])))) - if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA - } - tacsatp$SI_SPCA <- SI_SPCA - - vessels <- unique(tacsatp$VE_REF) - first.vessels <- unlist(lapply(as.list(vessels),function(x){which(tacsatp$VE_REF==x)[1]})) - last.vessels <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsatp$VE_REF==x))[1]})) - if(weight[1] != 0) tacsatp$SI_SPCA[first.vessels] <- NA - if(weight[2] != 0) tacsatp$SI_SPCA[last.vessels] <- NA - if(fill.na==TRUE) tacsatp$SI_SPCA[first.vessels] <- distance_xplus1[first.vessels] / (interval[first.vessels]/60) - if(fill.na==TRUE) tacsatp$SI_SPCA[last.vessels] <- distance_xmin1[last.vessels] / (interval[last.vessels]/60) - - } - idx <- which(colnames(tacsatp) %in% c("INTV")) - #- Convert from km/h to knots / nautical miles an hour - tacsatp$SI_SPCA <- tacsatp$SI_SPCA / 1.852 -return(if(length(idx)>0){tacsatp[,-idx]}else{tacsatp})} \ No newline at end of file +#' Calculate the speed of the vessel at each ping +#' +#' Calculate the speed of a vessel based on the time and distance travelled +#' between pings. +#' +#' Note that the DEFAULT speed given is the difference between ping x and ping +#' x-1. Hence, the first ping of a vessel or trip does NOT have a calculated +#' speed and will display NA. +#' +#' With weight you can specify if the speed is calculated between ping x and +#' ping x-1 (weight = c(1,0)), if the speed is calculated between ping x and +#' ping x+1 (weight = c(0,1)) or an intermediate weight (weight = c(0.4,0.6) / +#' equal weight = c(0.5,0.5)). +#' +#' @param tacsat tacsat dataset +#' @param level level to get calculated speed at: trip or vessel +#' @param weight weight to apply to calculation of mean speed towards and away +#' from ping +#' @param fill.na If speed cannot be calculated based on default or provided +#' weight, take closest alternative to provide a speed +#' @return The original tacsat file is returned including a column: SI_SPCA +#' which holds the calculated speed in knots +#' @author Niels T. Hintzen +#' @seealso \code{\link{interpolateTacsat}},\code{\link{intervalTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' result <- calculateSpeed(tacsat[1:100,],level="vessel") +#' result <- calculateSpeed(tacsat[1:100,],level="vessel",weight=c(2,1),fill.na=TRUE) +#' +#' data(eflalo) +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' result <- calculateSpeed(tacsatp[1:100,],level="trip",weight=c(1,1),fill.na=FALSE) +#' +#' +#' +#' @export calculateSpeed +calculateSpeed <- function(tacsat,level="vessel",weight=c(1,1),fill.na=FALSE){ + + if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector") + weight <- weight / sum(weight,na.rm=TRUE) + + #- Add interval between succeeding points + tacsatp <- intervalTacsat(tacsat,level=level,weight=weight,fill.na=fill.na) + + if(!"SI_DATIM" %in% colnames(tacsatp)) tacsatp$SI_DATIM <- as.POSIXct(paste(tacsatp$SI_DATE, tacsatp$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + if(level=="trip"){ + if(is.null(tacsatp$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level") + sptacsat <- split(tacsatp,tacsatp$VE_REF) + tacsatp$SI_SPCA <- unlist(lapply(sptacsat,function(x){ + + FT_REF <- as.factor(x$FT_REF); + res <- by(x,FT_REF, + function(y){ + if(nrow(y)>1){ + interval <- y$INTV + distance_xmin1 <- c(NA,distance(y$SI_LONG[2:nrow(y)], y$SI_LATI[2:nrow(y)], + y$SI_LONG[1:(nrow(y)-1)],y$SI_LATI[1:(nrow(y)-1)])) + distance_xplus1 <- c(distance_xmin1[-1],NA) + if(any(weight == 0)){ + if(weight[1] == 0) SI_SPCA <- distance_xplus1 / (interval/60) + if(weight[2] == 0) SI_SPCA <- distance_xmin1 / (interval/60) + } else { + difftime_xmin1 <- intervalTacsat(y,level="trip",weight=c(1,0),fill.na=fill.na)$INTV + difftime_xplus1 <- intervalTacsat(y,level="trip",weight=c(0,1),fill.na=fill.na)$INTV + SI_SPCA <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) / + (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60)))) + } + #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different + # weight setting. + if(fill.na==TRUE){ + idx <- which(is.na(SI_SPCA)==TRUE) + if(weight[1] == 0 | weight[2] == 0){ + speeds <- cbind((distance_xmin1[idx] * weight[1]) / (interval[idx]/60), + (distance_xplus1[idx] * weight[2]) / (interval[idx]/60)) + } else { + speeds <- cbind((distance_xmin1[idx] * weight[1]) / (difftime_xmin1[idx] /60), + (distance_xplus1[idx] * weight[2]) / (difftime_xplus1[idx]/60)) + } + #- Use of 0.5 if ifelse = NA, as counter to 2*... + SI_SPCA[idx] <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))}) + #SI_SPCA[idx] <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) / + # (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1]) + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2]))) + if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA + } + + return(SI_SPCA) + } else { + return(NA) + } + }) + return(unsplit(res,FT_REF))})) + tacsatp$SI_SPCA[which(tacsatp$FT_REF == "0")] <- NA + + } + if(level=="vessel"){ + #- Take the interval for weights c(0,1) and c(1,0) direct from tacsat + interval <- tacsatp$INTV + distance_xmin1 <- c(NA,distance(tacsatp$SI_LONG[2:nrow(tacsatp)], tacsatp$SI_LATI[2:nrow(tacsatp)], + tacsatp$SI_LONG[1:(nrow(tacsatp)-1)], tacsatp$SI_LATI[1:(nrow(tacsatp)-1)])) + distance_xplus1 <- c(distance_xmin1[-1],NA) + #- Recalculate the interval rate for either outer averages + difftime_xmin1 <- intervalTacsat(tacsatp,level="vessel",weight=c(1,0),fill.na=fill.na)$INTV + difftime_xplus1 <- intervalTacsat(tacsatp,level="vessel",weight=c(0,1),fill.na=fill.na)$INTV + if(any(weight == 0)){ + if(weight[1] == 0) SI_SPCA <- distance_xplus1 / (interval/60) + if(weight[2] == 0) SI_SPCA <- distance_xmin1 / (interval/60) + } else { + SI_SPCA <- 2*(2* ((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) * (distance_xplus1 * weight[2] / (difftime_xplus1/60)) / + (((distance_xmin1 * weight[1]) / (difftime_xmin1/60)) + (distance_xplus1 * weight[2] / (difftime_xplus1/60)))) + } + #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different + # weight setting. + if(fill.na==TRUE){ + idx <- which(is.na(SI_SPCA)==TRUE) + if(weight[1] == 0 | weight[2] == 0){ + speeds <- cbind((distance_xmin1[idx] * weight[1]) / (interval[idx]/60), + (distance_xplus1[idx] * weight[2]) / (interval[idx]/60)) + } else { + speeds <- cbind((distance_xmin1[idx] * weight[1]) / (difftime_xmin1[idx] /60), + (distance_xplus1[idx] * weight[2]) / (difftime_xplus1[idx]/60)) + } + #- Use of 0.5 if ifelse = NA, as counter to 2*... + SI_SPCA[idx] <- apply(speeds,1,function(x){isfin <- is.finite(x); return(ifelse(all(isfin),mean(x),ifelse(any(isfin),x[isfin],NA)))}) + #SI_SPCA[idx] <- 2*(2* ifelse(is.na(speeds[,1])==TRUE,0.5,speeds[,1]) * ifelse(is.na(speeds[,2])==TRUE,0.5,speeds[,2]) / + # (ifelse(is.na(speeds[,1])==TRUE,0,speeds[,1]) + ifelse(is.na(speeds[,2])==TRUE,0,speeds[,2])))) + if(length(which(SI_SPCA[idx]==0))>0) SI_SPCA[idx][which(SI_SPCA[idx]==0)]<- NA + } + tacsatp$SI_SPCA <- SI_SPCA + + vessels <- unique(tacsatp$VE_REF) + first.vessels <- unlist(lapply(as.list(vessels),function(x){which(tacsatp$VE_REF==x)[1]})) + last.vessels <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsatp$VE_REF==x))[1]})) + if(weight[1] != 0) tacsatp$SI_SPCA[first.vessels] <- NA + if(weight[2] != 0) tacsatp$SI_SPCA[last.vessels] <- NA + if(fill.na==TRUE) tacsatp$SI_SPCA[first.vessels] <- distance_xplus1[first.vessels] / (interval[first.vessels]/60) + if(fill.na==TRUE) tacsatp$SI_SPCA[last.vessels] <- distance_xmin1[last.vessels] / (interval[last.vessels]/60) + + } + idx <- which(colnames(tacsatp) %in% c("INTV")) + #- Convert from km/h to knots / nautical miles an hour + tacsatp$SI_SPCA <- tacsatp$SI_SPCA / 1.852 +return(if(length(idx)>0){tacsatp[,-idx]}else{tacsatp})} diff --git a/vmstools/R/clipObs2Tacsat.r b/vmstools/R/clipObs2Tacsat.r index d4f8704..1924bc6 100644 --- a/vmstools/R/clipObs2Tacsat.r +++ b/vmstools/R/clipObs2Tacsat.r @@ -1,196 +1,251 @@ -clipObs2Tacsat <- function(tacsat, #The tacsat dataset - obs, #The observation dataset - method="grid", #The method used, on 'grid' or 'euclidean' distance - control.grid=list(spatGrid=NULL,resx=NULL,resy=NULL,gridBbox="obs"), #gridBbox: whether bounding box should come from tacsat or observations - control.euclidean=list(threshold=NULL), #all.t = all.tacsat - temporalRange=NULL,#The range in which tacsat records may deviate from the observation time stamp - all.t=FALSE, - rowSize=1000 - ){ - -tacsat <- sortTacsat(tacsat) -obs <- sortTacsat(obs) - -#- If you want to get the full tacsat dataset back, make a copy of it first -if(all.t){ - tacsat$ID <- 1:nrow(tacsat) - tacsatOrig <- tacsat -} - -if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") -if(!"SI_DATIM" %in% colnames(obs)) obs$SI_DATIM <- as.POSIXct(paste(obs$SI_DATE, obs$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - -#- Subset tacsat that can never have match with obs because of temporal or space ranges -if(method == "euclidean" & is.null(control.euclidean$threshold)==FALSE){ - rix <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[1],range(obs$SI_LATI,na.rm=TRUE)[1],control.euclidean$threshold) - minXobs <- range(obs$SI_LONG,na.rm=TRUE)[1] - rix - raiy <- control.euclidean$threshold/111.1949 - minYobs <- range(obs$SI_LATI,na.rm=TRUE)[1] - raiy - rax <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[2],range(obs$SI_LATI,na.rm=TRUE)[2],control.euclidean$threshold) - maxXobs <- range(obs$SI_LONG,na.rm=TRUE)[2] + rax - maxYobs <- range(obs$SI_LATI,na.rm=TRUE)[2] + raiy - tacsat <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs) -} -if(method == "grid"){ - minXobs <- range(obs$SI_LONG,na.rm=TRUE)[1] - control.grid$resx - maxXobs <- range(obs$SI_LONG,na.rm=TRUE)[2] + control.grid$resx - minYobs <- range(obs$SI_LATI,na.rm=TRUE)[1] - control.grid$resy - maxYobs <- range(obs$SI_LATI,na.rm=TRUE)[2] + control.grid$resy - tacsat <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs) -} - -if(is.null(temporalRange)==FALSE){ - minTobs <- range(obs$SI_DATIM,na.rm=TRUE)[1] + temporalRange[1]*60 - maxTobs <- range(obs$SI_DATIM,na.rm=TRUE)[2] + temporalRange[2]*60 - tacsat <- subset(tacsat,SI_DATIM >= minTobs & SI_DATIM <= maxTobs) -} -if(nrow(tacsat)==0) stop("Number of tacsat records that are within reach of obs dataset is zero") - - -#- Gridcell wanted, but not given yet, so create one -if(method == "grid" & is.null(control.grid$spatGrid) == TRUE){ - if(is.null(control.grid$resx) == TRUE | is.null(control.grid$resy) == TRUE) stop("Method selected needs resx and resy statements") - - xrangeO <- range(obs$SI_LONG,na.rm=TRUE); xrangeT <- range(tacsat$SI_LONG,na.rm=TRUE) - yrangeO <- range(obs$SI_LATI,na.rm=TRUE); yrangeT <- range(tacsat$SI_LATI,na.rm=TRUE) - - if(control.grid$gridBbox == "obs") spatGrid <- createGrid(xrangeO,yrangeO,control.grid$resx,control.grid$resx,type="SpatialGrid") - if(control.grid$gridBbox == "tacsat") spatGrid <- createGrid(xrangeT,yrangeT,control.grid$resy,control.grid$resy,type="SpatialGrid") - control.grid$spatGrid <- spatGrid -} - -#- Perform calculations on gridcell -if(method == "grid" & is.null(control.grid$spatGrid) == FALSE){ - sPDFObs <- SpatialPointsDataFrame(data.frame(cbind(obs$SI_LONG,obs$SI_LATI)),data=obs) - sPDFTac <- SpatialPointsDataFrame(data.frame(cbind(tacsat$SI_LONG,tacsat$SI_LATI)),data=tacsat) - resObs <- over(sPDFObs,spatGrid) - resTac <- over(sPDFTac,spatGrid) - - idxObs <- SpatialPoints(spatGrid)@coords[resObs,] - idxTac <- SpatialPoints(spatGrid)@coords[resTac,] - - obs$GR_LONG <- idxObs[,1]; obs$GR_LATI <- idxObs[,2] - tacsat$GR_LONG <- idxTac[,1]; tacsat$GR_LATI <- idxTac[,2] - - tacsat$GR_ID<- resTac; - obs$GR_ID <- resObs; - - - if(is.null(temporalRange)==FALSE){ - - res <- do.call(c,lapply(as.list(1:nrow(obs)),function(x){ res <- which(resTac == resObs[x]); - restime <- difftime(tacsat$SI_DATIM[res],obs$SI_DATIM[x],units="mins"); - #retrn <- ifelse(restime <= temporalRange[2] & restime >=temporalRange[1],resObs[x],NA) - retrn <- which(restime <= temporalRange[2] & restime >= temporalRange[1]) - return(res[retrn])})) - - - retrn <- list(obs,tacsat[res,]) - } else { - retrn <- list(obs,tacsat) - } -} - -#- Perform calculation by Euclidian distance -if(method == "euclidean"){ - - obs$GR_ID <- 1:nrow(obs) - - #- Create storage of tacsat records to save - totRes <- cbind(numeric(), numeric()) - - obsLon <- obs$SI_LONG - obsLat <- obs$SI_LATI - tacLon <- tacsat$SI_LONG - tacLat <- tacsat$SI_LATI - - #- Chop it up into pieces to speed up the calculations - nChunkObs <- ceiling(nrow(obs) /rowSize) - nChunkTac <- ceiling(nrow(tacsat) /rowSize) - for(iNO in 1:nChunkObs){ - if(iNO == nChunkObs){ - ox <- obsLon[(iNO*rowSize - rowSize + 1):length(obsLon)] - oy <- obsLat[(iNO*rowSize - rowSize + 1):length(obsLat)] - } else { - ox <- obsLon[(iNO*rowSize - rowSize + 1):(iNO*rowSize)] - oy <- obsLat[(iNO*rowSize - rowSize + 1):(iNO*rowSize)] - } - for(iNT in 1:nChunkTac){ - if(iNT == nChunkTac){ - tx <- tacLon[(iNT*rowSize - rowSize + 1):length(tacLon)] - ty <- tacLat[(iNT*rowSize - rowSize + 1):length(tacLat)] - } else { - tx <- tacLon[(iNT*rowSize - rowSize + 1):(iNT*rowSize)] - ty <- tacLat[(iNT*rowSize - rowSize + 1):(iNT*rowSize)] - } - - minXobs <- range(ox,na.rm=TRUE)[1] - rix - minYobs <- range(oy,na.rm=TRUE)[1] - raiy - maxXobs <- range(ox,na.rm=TRUE)[2] + rax - maxYobs <- range(oy,na.rm=TRUE)[2] + raiy - cont <- ifelse(length(which(tx >= minXobs & tx <= maxXobs & ty >= minYobs & ty <= maxYobs))>0,TRUE,FALSE) - - if(cont){ - - #- Check if the length of both sets are equal or not - if(iNO == nChunkObs | iNT == nChunkTac){ - - #- Get both the minimum distance, but also the index of the tacsat record associated - res <- do.call(rbind, - lapply(as.list(1:length((iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize)))), - function(x){ - #- Get the row numbers of the full observation and tacsat set used here - obsRows <- (iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize)) - if(length(tx)= temporalRange[1]) - } else { retrn <- 1:length(restime) } - if(length(tacRows[idx[retrn]])>0){ toReturn <- cbind(tacRows[idx[retrn]],obs$GR_ID[obsRows[x]]) - } else { toReturn <- cbind(NA,NA) } - - return(toReturn)})) - - - - totRes <- rbind(totRes,na.omit(res)) - } - if(iNO != nChunkObs & iNT != nChunkTac){ - - obsRows <- ((iNO*rowSize - rowSize +1):(iNO*rowSize)) - tacRows <- (((iNT*rowSize - rowSize + 1):(iNT*rowSize))) - res <- outer(1:rowSize,1:rowSize,FUN= - function(x,y){ - distObsTac <- distance(ox[x],oy[x],tx[y],ty[y]) - return(distObsTac)}) - - idx <- apply(res,2,function(x){return(x <= control.euclidean$threshold)}) - idx <- which(idx == TRUE,arr.ind=TRUE) - restime <- difftime(tacsat$SI_DATIM[tacRows[idx[,2]]],obs$SI_DATIM[obsRows[idx[,1]]],units="mins") - - if(is.null(temporalRange)==FALSE){ retrn <- which(restime <= temporalRange[2] & restime >= temporalRange[1]) - } else { retrn <- 1:length(restime) } - if(length(retrn)>0){ - res <- cbind(tacRows[idx[retrn,2]],obs$GR_ID[obsRows[idx[retrn,1]]]) - totRes <- rbind(totRes,na.omit(res)) - } - } - } - - }#End iNT loop - }#End iNO loop - tacsat$GR_ID <- NA - dubTacsat <- tacsat[totRes[,1],] - dubTacsat$GR_ID <- totRes[,2] - - - retrn <- list(obs,dubTacsat) -}#End method euclidean - -if(all.t) retrn[[2]] <- merge(retrn[[2]],tacsatOrig,by=colnames(tacsatOrig),all=TRUE) - -return(retrn)} +#' Find tacsat records close to observation data records +#' +#' Find the tacsat records that are close in distance or on a defined grid to +#' the observation data positions in both time and space +#' +#' Returned is a list with the first element being the observation dataset with +#' an GR_ID identifier that matches the GR_ID from the matching tacsat records. +#' The second element is the tacsat records with the same GR_IDs. +#' +#' @param tacsat Dataframe of vessel VMS in tacsat format +#' @param obs Dataframe also in tacsat format, but with observation data (GPS +#' and Date+Time) +#' @param method 'Grid' or 'Euclidian'. Match tacsat records in same gridcell +#' (method = "grid") or only x km from observed datapoint (method = +#' "euclidean") +#' @param control.grid If method = "grid", control.grid is a list with +#' specification of the spatialGrid (spatGrid, of class "SpatialGrid") or the +#' resolution in x and y values where the outermargins can be given by the +#' tacsat dataset (gridBox="tacsat") or the observation dataset +#' (gridBox="obs"). +#' +#' Example: control.grid=list(spatGrid = NULL, resx = c(0.5,1), resy = NULL, +#' gridBbox = "obs") +#' @param control.euclidean If method = "euclidean" then a maximum distance the +#' tacsat position may deviate from the observed datapoint needs to be given +#' (in km). +#' @param temporalRange The range in which tacsat records may deviate from the +#' observation time stamp (in minutes) +#' @param all.t If you want to return all tacsat records, all.t=T (return = +#' list, first element = observation set, second = matching tacsat), else F +#' second = all tacsat +#' @param rowSize To speed up calculations, define maximum rowSize (default = +#' 1000) +#' @author Niels T. Hintzen +#' @references EU Lot 2 project +#' @examples +#' +#' data(tacsat) +#' tacsat <- sortTacsat(tacsat) +#' +#' obs <- tacsat[round(seq(1,nrow(tacsat),length.out=30)),] +#' obs <- obs[,c(1,2,3,4,5,6)] +#' obs$OB_TYP <- "Benthos" +#' colnames(obs) <- c("OB_COU","OB_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","OB_TYP") +#' newTime <- obs$SI_DATIM - runif(30,-60*20,60*20) +#' obs$SI_LATI <- jitter(obs$SI_LATI,factor=0.25) +#' obs$SI_LONG <- jitter(obs$SI_LONG,factor=0.5) +#' +#' +#' a <- clipObs2Tacsat(tacsat,obs,method="grid",control.grid=list(resx=0.1,resy=0.05, +#' gridBbox="obs"),temporalRange=c(-30,120),all.t=FALSE) +#' a <- clipObs2Tacsat(tacsat,obs,method="euclidean",control.euclidean=list(threshold=1), +#' temporalRange=c(-1e10,-1) ,all.t=FALSE) +#' +#' @export clipObs2Tacsat +clipObs2Tacsat <- function(tacsat, #The tacsat dataset + obs, #The observation dataset + method="grid", #The method used, on 'grid' or 'euclidean' distance + control.grid=list(spatGrid=NULL,resx=NULL,resy=NULL,gridBbox="obs"), #gridBbox: whether bounding box should come from tacsat or observations + control.euclidean=list(threshold=NULL), #all.t = all.tacsat + temporalRange=NULL,#The range in which tacsat records may deviate from the observation time stamp + all.t=FALSE, + rowSize=1000 + ){ + +tacsat <- sortTacsat(tacsat) +obs <- sortTacsat(obs) + +#- If you want to get the full tacsat dataset back, make a copy of it first +if(all.t){ + tacsat$ID <- 1:nrow(tacsat) + tacsatOrig <- tacsat +} + +if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") +if(!"SI_DATIM" %in% colnames(obs)) obs$SI_DATIM <- as.POSIXct(paste(obs$SI_DATE, obs$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + +#- Subset tacsat that can never have match with obs because of temporal or space ranges +if(method == "euclidean" & is.null(control.euclidean$threshold)==FALSE){ + rix <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[1],range(obs$SI_LATI,na.rm=TRUE)[1],control.euclidean$threshold) + minXobs <- range(obs$SI_LONG,na.rm=TRUE)[1] - rix + raiy <- control.euclidean$threshold/111.1949 + minYobs <- range(obs$SI_LATI,na.rm=TRUE)[1] - raiy + rax <- km2Degree(range(obs$SI_LONG,na.rm=TRUE)[2],range(obs$SI_LATI,na.rm=TRUE)[2],control.euclidean$threshold) + maxXobs <- range(obs$SI_LONG,na.rm=TRUE)[2] + rax + maxYobs <- range(obs$SI_LATI,na.rm=TRUE)[2] + raiy + tacsat <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs) +} +if(method == "grid"){ + minXobs <- range(obs$SI_LONG,na.rm=TRUE)[1] - control.grid$resx + maxXobs <- range(obs$SI_LONG,na.rm=TRUE)[2] + control.grid$resx + minYobs <- range(obs$SI_LATI,na.rm=TRUE)[1] - control.grid$resy + maxYobs <- range(obs$SI_LATI,na.rm=TRUE)[2] + control.grid$resy + tacsat <- subset(tacsat,SI_LONG >= minXobs & SI_LONG <= maxXobs & SI_LATI >= minYobs & SI_LATI <= maxYobs) +} + +if(is.null(temporalRange)==FALSE){ + minTobs <- range(obs$SI_DATIM,na.rm=TRUE)[1] + temporalRange[1]*60 + maxTobs <- range(obs$SI_DATIM,na.rm=TRUE)[2] + temporalRange[2]*60 + tacsat <- subset(tacsat,SI_DATIM >= minTobs & SI_DATIM <= maxTobs) +} +if(nrow(tacsat)==0) stop("Number of tacsat records that are within reach of obs dataset is zero") + + +#- Gridcell wanted, but not given yet, so create one +if(method == "grid" & is.null(control.grid$spatGrid) == TRUE){ + if(is.null(control.grid$resx) == TRUE | is.null(control.grid$resy) == TRUE) stop("Method selected needs resx and resy statements") + + xrangeO <- range(obs$SI_LONG,na.rm=TRUE); xrangeT <- range(tacsat$SI_LONG,na.rm=TRUE) + yrangeO <- range(obs$SI_LATI,na.rm=TRUE); yrangeT <- range(tacsat$SI_LATI,na.rm=TRUE) + + if(control.grid$gridBbox == "obs") spatGrid <- createGrid(xrangeO,yrangeO,control.grid$resx,control.grid$resx,type="SpatialGrid") + if(control.grid$gridBbox == "tacsat") spatGrid <- createGrid(xrangeT,yrangeT,control.grid$resy,control.grid$resy,type="SpatialGrid") + control.grid$spatGrid <- spatGrid +} + +#- Perform calculations on gridcell +if(method == "grid" & is.null(control.grid$spatGrid) == FALSE){ + sPDFObs <- SpatialPointsDataFrame(data.frame(cbind(obs$SI_LONG,obs$SI_LATI)),data=obs) + sPDFTac <- SpatialPointsDataFrame(data.frame(cbind(tacsat$SI_LONG,tacsat$SI_LATI)),data=tacsat) + resObs <- over(sPDFObs,spatGrid) + resTac <- over(sPDFTac,spatGrid) + + idxObs <- SpatialPoints(spatGrid)@coords[resObs,] + idxTac <- SpatialPoints(spatGrid)@coords[resTac,] + + obs$GR_LONG <- idxObs[,1]; obs$GR_LATI <- idxObs[,2] + tacsat$GR_LONG <- idxTac[,1]; tacsat$GR_LATI <- idxTac[,2] + + tacsat$GR_ID<- resTac; + obs$GR_ID <- resObs; + + + if(is.null(temporalRange)==FALSE){ + + res <- do.call(c,lapply(as.list(1:nrow(obs)),function(x){ res <- which(resTac == resObs[x]); + restime <- difftime(tacsat$SI_DATIM[res],obs$SI_DATIM[x],units="mins"); + #retrn <- ifelse(restime <= temporalRange[2] & restime >=temporalRange[1],resObs[x],NA) + retrn <- which(restime <= temporalRange[2] & restime >= temporalRange[1]) + return(res[retrn])})) + + + retrn <- list(obs,tacsat[res,]) + } else { + retrn <- list(obs,tacsat) + } +} + +#- Perform calculation by Euclidian distance +if(method == "euclidean"){ + + obs$GR_ID <- 1:nrow(obs) + + #- Create storage of tacsat records to save + totRes <- cbind(numeric(), numeric()) + + obsLon <- obs$SI_LONG + obsLat <- obs$SI_LATI + tacLon <- tacsat$SI_LONG + tacLat <- tacsat$SI_LATI + + #- Chop it up into pieces to speed up the calculations + nChunkObs <- ceiling(nrow(obs) /rowSize) + nChunkTac <- ceiling(nrow(tacsat) /rowSize) + for(iNO in 1:nChunkObs){ + if(iNO == nChunkObs){ + ox <- obsLon[(iNO*rowSize - rowSize + 1):length(obsLon)] + oy <- obsLat[(iNO*rowSize - rowSize + 1):length(obsLat)] + } else { + ox <- obsLon[(iNO*rowSize - rowSize + 1):(iNO*rowSize)] + oy <- obsLat[(iNO*rowSize - rowSize + 1):(iNO*rowSize)] + } + for(iNT in 1:nChunkTac){ + if(iNT == nChunkTac){ + tx <- tacLon[(iNT*rowSize - rowSize + 1):length(tacLon)] + ty <- tacLat[(iNT*rowSize - rowSize + 1):length(tacLat)] + } else { + tx <- tacLon[(iNT*rowSize - rowSize + 1):(iNT*rowSize)] + ty <- tacLat[(iNT*rowSize - rowSize + 1):(iNT*rowSize)] + } + + minXobs <- range(ox,na.rm=TRUE)[1] - rix + minYobs <- range(oy,na.rm=TRUE)[1] - raiy + maxXobs <- range(ox,na.rm=TRUE)[2] + rax + maxYobs <- range(oy,na.rm=TRUE)[2] + raiy + cont <- ifelse(length(which(tx >= minXobs & tx <= maxXobs & ty >= minYobs & ty <= maxYobs))>0,TRUE,FALSE) + + if(cont){ + + #- Check if the length of both sets are equal or not + if(iNO == nChunkObs | iNT == nChunkTac){ + + #- Get both the minimum distance, but also the index of the tacsat record associated + res <- do.call(rbind, + lapply(as.list(1:length((iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize)))), + function(x){ + #- Get the row numbers of the full observation and tacsat set used here + obsRows <- (iNO*rowSize - rowSize +1):ifelse(iNO == nChunkObs,length(obsLon),(iNO*rowSize)) + if(length(tx)= temporalRange[1]) + } else { retrn <- 1:length(restime) } + if(length(tacRows[idx[retrn]])>0){ toReturn <- cbind(tacRows[idx[retrn]],obs$GR_ID[obsRows[x]]) + } else { toReturn <- cbind(NA,NA) } + + return(toReturn)})) + + + + totRes <- rbind(totRes,na.omit(res)) + } + if(iNO != nChunkObs & iNT != nChunkTac){ + + obsRows <- ((iNO*rowSize - rowSize +1):(iNO*rowSize)) + tacRows <- (((iNT*rowSize - rowSize + 1):(iNT*rowSize))) + res <- outer(1:rowSize,1:rowSize,FUN= + function(x,y){ + distObsTac <- distance(ox[x],oy[x],tx[y],ty[y]) + return(distObsTac)}) + + idx <- apply(res,2,function(x){return(x <= control.euclidean$threshold)}) + idx <- which(idx == TRUE,arr.ind=TRUE) + restime <- difftime(tacsat$SI_DATIM[tacRows[idx[,2]]],obs$SI_DATIM[obsRows[idx[,1]]],units="mins") + + if(is.null(temporalRange)==FALSE){ retrn <- which(restime <= temporalRange[2] & restime >= temporalRange[1]) + } else { retrn <- 1:length(restime) } + if(length(retrn)>0){ + res <- cbind(tacRows[idx[retrn,2]],obs$GR_ID[obsRows[idx[retrn,1]]]) + totRes <- rbind(totRes,na.omit(res)) + } + } + } + + }#End iNT loop + }#End iNO loop + tacsat$GR_ID <- NA + dubTacsat <- tacsat[totRes[,1],] + dubTacsat$GR_ID <- totRes[,2] + + + retrn <- list(obs,dubTacsat) +}#End method euclidean + +if(all.t) retrn[[2]] <- merge(retrn[[2]],tacsatOrig,by=colnames(tacsatOrig),all=TRUE) + +return(retrn)} diff --git a/vmstools/R/compareToOrdination.r b/vmstools/R/compareToOrdination.r index 1f56597..b61b87f 100644 --- a/vmstools/R/compareToOrdination.r +++ b/vmstools/R/compareToOrdination.r @@ -1,236 +1,367 @@ -########################################################################### -# DETERMINE M�TIERS LEVEL 7 AND 5 WITH TWO ORDINATION METHODS # -# (FIRST SPECIES IN CATCH AND FIRST GROUP OF SPECIES IN CATCH) # -# # -# DETERMINE M�TIERS LEVEL5 FROM M�TIERS LEVEL7 FOUND WITH THE # -# MULTIVARIATE CLASSIFICATION (DETERMINING THE TARGET SPECIES # -# FOR EACH CLUSTER) # -# # -# COMPARE THE M�TIERS FOUND WITH THE ORDINATION METHODS AND # -# WITH THE MULTIVARIATE CLASSIFICATION # -########################################################################### - -compareToOrdination=function(dat, Step2, clusters, targetSpecies){ - - # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5). - data(correspLevel7to5) - - # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). - data(correspMixedMetier) - - - ########################## - # FIRST SPECIES IN CATCH # - ########################## - - dat=as.matrix(dat[,2:ncol(dat)]) - p=ncol(dat) - firstSp=list() - nbLogFirstSp=list() - - # Determine the first species for each logevent (the catchest species of the logevent) - firstSp=apply(dat,1,function(x) names(which.max(x))) - metiersFirstSpeciesL7=firstSp - - # Calculate the number of logevent by first species (number of logevents by cluster) - nbLogFirstSp=sapply(unique(unlist(firstSp)),function(x) table(firstSp==x)[2]) - - names(nbLogFirstSp)=substr(names(nbLogFirstSp),1,3) - - # Number of logevents linked to each species from the method "first species in catch" - png("Number of logevents by ordination metier level7.png", width = 1200, height = 800) - barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)], main="Number of logevents by ordination metier level7", xlab="Main species", ylab="Number of Logevents", las=3) - dev.off() - # idem without the species with the biggest number of logevents - png(paste("Number of logevents by ordination metier level7 without ",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],".png",sep=""), width = 1200, height = 800) - barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)][-1], main=paste("Number of logevents by ordination metier level7 without",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],sep=" "), xlab="Main species", ylab="Number of Logevents", las=3) - dev.off() - - - # Names of species from the ordination method - nomEspOrdi=names(nbLogFirstSp) - nbEspOrdi=length(nomEspOrdi) - nomEspOrdi=nomEspOrdi[order(nomEspOrdi,decreasing=FALSE)] - # Initialization - numEspOrdiClarai=numeric() - nomEspOrdiPlot=character() - nbLogEspOrdiPlot=numeric() - nbClust=length(unique(clusters)) - - # For each cluster Clara - for(i in 1:nbClust){ - # Names of species from ordination method linked to the logevents of cluster Clara i - nomEspOrdiClarai=unique(unlist(firstSp[which(clusters==i)])) - # their number - numEspOrdiClarai=sapply(nomEspOrdiClarai, function(x) which(nomEspOrdi==x)) - - # Names of species to plot in the barplot of cluster i - nomEspOrdiPloti=rep("",nbEspOrdi) - nomEspOrdiPloti[numEspOrdiClarai]=nomEspOrdiClarai - nomEspOrdiPlot=rbind(nomEspOrdiPlot,nomEspOrdiPloti) - - # for the cluster i, number of logevents of cluster i linked to each species from ordination method - nbLogEspOrdiPloti=rep(0,nbEspOrdi) - nbLogEspOrdiPloti[numEspOrdiClarai]=sapply(nomEspOrdiClarai,function(x) rev(table(firstSp[which(clusters==i)]==x))[1]) - nbLogEspOrdiPlot=rbind(nbLogEspOrdiPlot,nbLogEspOrdiPloti) - } - colnames(nbLogEspOrdiPlot)=names(nbLogFirstSp)[order(names(nbLogFirstSp))] - rownames(nbLogEspOrdiPlot)=paste("nbLogFirstSpCluster",seq=1:nbClust,sep="") - - # Graphics - png("Ordination_metiersL7_by_cluster.png", width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(nbLogEspOrdiPlot[i,],names.arg=nomEspOrdiPlot[i,], las=3) - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Number of logevents linked to each metier from the ordination method 'first species' by cluster","\n","\n",sep="")) - dev.off() - - # Projections on the first factorial plans - nomEspOrdi=substr(names(nbLogFirstSp),1,3) - espOrdi=matrix() - espOrdi=rbind(names(nbLogFirstSp),seq(1:length(nbLogFirstSp))) - numFirstSp=firstSp - for(i in 1:length(nbLogFirstSp)){ - numFirstSp[which(firstSp==nomEspOrdi[i])]=espOrdi[2,which(espOrdi[1,]==nomEspOrdi[i])] - } - - # Projections on the first factorial plans - png("Ordination_Projections.png", width = 1200, height = 800) - op <- par(mfrow=c(2,3)) - plot(Step2[,1], Step2[,2], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") - plot(Step2[,2], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") - plot(Step2[,1], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") - plot(Step2[,1], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") - plot(Step2[,2], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") - plot(Step2[,3], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") - par(op) - dev.off() - - - # Determine the metier level 5 from the metier level 7 based on the first species in catch (logevent by logevent) - print("Please, be patient...") - metiersFirstSpeciesL5=lapply(as.list(firstSp), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} - else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"]) - metiersFirstSpeciesL5=unlist(lapply(metiersFirstSpeciesL5, function(x) as.character(x))) - - - - - ################################### - # FIRST GROUP OF SPECIES IN CATCH # - ################################### - - # Determine the metier of each logevent thanks to the first group of species in catch (level 5) of the logevent - datL5=dat - groupColSpecies=lapply(as.list(colnames(datL5)), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} - else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"]) - groupColSpecies=unlist(lapply(groupColSpecies, function(x) as.character(x))) - colnames(datL5)=groupColSpecies - nbC5=length(unique(colnames(datL5))) - tab=matrix(NA,nrow=nrow(datL5),ncol=nbC5) - colnames(tab)=unique(colnames(datL5)) - for(i in 1:nbC5){ - # Sub-data containing only one metier level 5. - subdati=datL5[,which(colnames(datL5)==(unique(colnames(datL5))[i]))] - if(length(which(colnames(datL5)==(unique(colnames(datL5))[i])))==1) tab[,i]=subdati - else - tab[,i]=apply(subdati,1,sum) - } - # Metiers (level 5) of each logevent found thanks to the first group of species in catch (level 5) - metiersFirstGroupL5=unique(colnames(datL5))[apply(tab,1,which.max)] - - - - - - - ############################################################### - # LEVELS 7 AND 5 FOR METIERS FROM MULTIVARIATE CLASSIFICATION # - ############################################################### - - # List of metiers (level 5.7) - listMetiersLevel57=list() - for (i in 1:nbClust){ - metiersClusteri=lapply(targetSpecies[[i]], function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} - else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"]) - metiersClusteri=as.character(unique(unlist(metiersClusteri))) - metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") - listMetiersLevel57[[i]]=metiersClusteri - } - - # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method - #(where mixed metiers labels are the aggregation of several simple metiers labels). - metiersClustersL5=unlist(sapply(clusters,function(x) listMetiersLevel57[x])) - - - # Correspondence of mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). - listMixedMetiersLevel57=lapply(listMetiersLevel57, function(x) if(nchar(x)!=3 & is.element(x,correspMixedMetier[,1])){correspMixedMetier[which(correspMixedMetier[,"combMetiersL5"]==x),"mixedMetierL5"]} - else x ) - - # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method (with official labels for mixed metiers). - mixedMetiersClustersL5=unlist(sapply(clusters,function(x) listMixedMetiersLevel57[x])) - - - - - - ################# - # COMPARISONS # - ################# - - # Compare the metiers 'Clusters' (from getMetierClusters) (level 7) vs the metier 'First Species' in catch (level 7) - compClustersL7vsFirstSpeciesL7=table(clusters,metiersFirstSpeciesL7) - rownames(compClustersL7vsFirstSpeciesL7)=paste("Clust",seq=1:nbClust,sep="") - - # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Species' (level 5) - clustStep3L5=clusters - compClustersL5vsFirstSpeciesL5=table(clustStep3L5,metiersFirstSpeciesL5) - rownames(compClustersL5vsFirstSpeciesL5)=unlist(listMixedMetiersLevel57) - - # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Group' (level 5) - #compL5ClustStep3VsGroup=table(clustersStep3L5,metiersFirstGroupL5) !! pb regroupement des clusters qui correspondent au m�me groupe d'esp�ce niveau 5. - clustStep3L5=clusters - compClustersL5vsFirstGroupL5=table(clustStep3L5,metiersFirstGroupL5) - rownames(compClustersL5vsFirstGroupL5)=unlist(listMixedMetiersLevel57) - - # Compare the metiers 'First Species' (level 5) vs the metiers 'First Group' (level 5) - compFirstSpeciesL5vsFirstGroupL5=table(metiersFirstSpeciesL5,metiersFirstGroupL5) - - - # Less attractive - # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'Clusters' (level 7) - clustStep3L7=clusters - compClustersL5vsClustersL7=table(mixedMetiersClustersL5,clustStep3L7) - colnames(compClustersL5vsClustersL7)=paste("Clust",seq=1:nbClust) - - - - # Create csv tables - dfTables=data.frame() - dfTables=c("compClustersL7vsFirstSpeciesL7","compClustersL5vsFirstSpeciesL5", - "compClustersL5vsFirstGroupL5","compFirstSpeciesL5vsFirstGroupL5","compClustersL5vsClustersL7") - write.table(dfTables[1],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(compClustersL7vsFirstSpeciesL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(dfTables[2],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(compClustersL5vsFirstSpeciesL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(dfTables[3],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(compClustersL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(dfTables[4],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(compFirstSpeciesL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(dfTables[5],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - write.table(compClustersL5vsClustersL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) - - print("Done.") - - return(list(nbLogFirstSp=nbLogFirstSp, - compClustersL7vsFirstSpeciesL7=compClustersL7vsFirstSpeciesL7, - compClustersL5vsFirstSpeciesL5=compClustersL5vsFirstSpeciesL5, - compClustersL5vsFirstGroupL5=compClustersL5vsFirstGroupL5, - compFirstSpeciesL5vsFirstGroupL5=compFirstSpeciesL5vsFirstGroupL5, - compClustersL5vsClustersL7=compClustersL5vsClustersL7)) -} \ No newline at end of file +########################################################################### +# DETERMINE M�TIERS LEVEL 7 AND 5 WITH TWO ORDINATION METHODS # +# (FIRST SPECIES IN CATCH AND FIRST GROUP OF SPECIES IN CATCH) # +# # +# DETERMINE M�TIERS LEVEL5 FROM M�TIERS LEVEL7 FOUND WITH THE # +# MULTIVARIATE CLASSIFICATION (DETERMINING THE TARGET SPECIES # +# FOR EACH CLUSTER) # +# # +# COMPARE THE M�TIERS FOUND WITH THE ORDINATION METHODS AND # +# WITH THE MULTIVARIATE CLASSIFICATION # +########################################################################### + + + +#' Compare the metiers found with the classification from clustering e.g. CLARA +#' against the metiers found by two simple alternative ordination methods: +#' 'first species' & 'first group'. +#' +#' This function allows to compare the metiers found using the classification +#' from clustering against the metiers found by two alternative ordination +#' methods, 'first species' and 'first group'. +#' +#' The 'first species' method consists of characterizing the level 7 of each +#' logevent by the species with the highest catch. Then, the level 5 is +#' deduced from the target assemblage of species for which this species belongs +#' to. +#' +#' The 'first group' method alternatively assigns a level 5 to each logevent +#' correponding to the target assemblage having the highest catch volume. +#' +#' The classification from clustering defines level 7 metiers for which each +#' metier is characterized by one or several species, called target species, +#' unlike the metiers from the simpler ordination methods. Hence, mixed metiers +#' can be obtained in case that these species belong to different target +#' assemblages. +#' +#' This function enlights the differences when defining the metiers between the +#' classification from clustering and the simpler ordination methods, and +#' potentially demonstrates the higher power of the data clustering method in +#' obtaining exhaustive and accurately defined metiers. +#' +#' +#' @param dat a reduced data.frame from an eflalo format. It should contain +#' only the LE_ID (Logevent ID) variable as well as all species names in +#' columns, with raw catch data. It may be needed to sort out potential +#' error-prone rows (such as rows with only 0) prior to the analysis, and to +#' also replace NA values by 0. +#' @param Step2 numerical matrix with logevents as rows, and values to be used +#' for calculating distances between individuals as columns. This matrix is +#' produced at the step 2 of the metier analysis, output of the function +#' getTableAfterPCA(). In case a PCA is run, the selected axes will appear as +#' columns. If no PCA is run, the matrix will be the same as datSpecies +#' (produced at the step 1 of the metier analysis, using the function +#' extractTableMainSpecies()), with percentage values by species. +#' @param clusters the vector storing the cluster label of each logbook event. +#' This vector will be produced at the step 3 of the metier analysis, using the +#' function getMetierClusters(). +#' @param targetSpecies a list giving the target species by cluster. This list +#' will be produced at the step 3 of the metier analysis, using the function +#' getMetierClusters(). +#' @return The function returns a list with a number of tables on the +#' comparison of the three methods: \item{nbLogFirstSp}{An integer vector +#' giving the number of logevents allocated to each metier (level 7) defined by +#' the ordination method 'first species'. } +#' \item{compClustersL7vsFirstSpeciesL7}{A table giving the distribution of the +#' logevents of each metier (level 7) defined by the multivariate +#' classification in the metiers (level 7) defined by the ordination method +#' 'first species'. } \item{compClustersL5vsFirstSpeciesL5}{A table giving the +#' distribution of the logevents of each metier (level 5) defined by the +#' multivariate classification in the metiers (level 5) defined by the +#' ordination method 'first species'. } \item{compClustersL5vsFirstGroupL5}{A +#' table giving the distribution of the logevents of each metier (level 5) +#' defined by the multivariate classification in the metiers (level 5) defined +#' by the ordination method 'first group'. } +#' \item{compFirstSpeciesL5vsFirstGroupL5}{A table giving the distribution of +#' the logevents of each metier (level 5) defined by the ordination method +#' 'first species' in the metiers (level 5) defined by the ordination method +#' 'first group'. } \item{compClustersL5vsClustersL7}{A table giving the +#' distribution of the logevents of each metier (level 5) defined by the +#' multivariate classification in the metiers (level 7) defined by the +#' multivariate classification. } +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @seealso \code{\link{getEflaloMetierLevel7}}, +#' \code{\link{selectMainSpecies}}, \code{\link{extractTableMainSpecies}}, +#' \code{\link{getMetierClusters}}, \code{\link{getTableAfterPCA}}, +#' \code{\link{getMetierClusters}} +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @examples +#' +#' +#' +#' +#' \dontrun{ +#' +#' data(eflalo) +#' +#' eflalo <- formatEflalo(eflalo) +#' +#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",] +#' +#' # note that output plots will be sent to getwd() +#' analysisName <- "metier_analysis_OTB" +#' +#' dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))] +#' names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]])) +#' +#' explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE) +#' #=> send the LE_ID and LE_KG_SP columns only +#' +#' Step1 <- extractTableMainSpecies(dat, explo$NamesMainSpeciesHAC, +#' paramTotal=95, paramLogevent=100) +#' #=> send the LE_ID and LE_KG_SP columns only +#' +#' rowNamesSave <- row.names(Step1) +#' row.names(Step1) <- 1:nrow(Step1) +#' +#' # Run a PCA +#' Step2 <- getTableAfterPCA(Step1, analysisName, +#' pcaYesNo="pca", criterion="70percents") +#' +#' row.names(Step1) <- rowNamesSave +#' row.names(Step2) <- rowNamesSave +#' +#' # Define a metier for each logevent running the CLARA algorithm +#' Step3 <- getMetierClusters(Step1, Step2, analysisName, +#' methMetier="clara", param1="euclidean", param2=NULL) +#' +#' # Compare the differences between the metiers defined by CLARA +#' # and the metiers defined by two simple ordination methods +#' compMetiers <- compareToOrdination(dat, Step2, +#' clusters=Step3$clusters$clustering, +#' targetSpecies=Step3$targetSpecies) +#' #=> send the LE_ID and LE_KG_SP columns only +#' +#' } +#' +#' +#' @export compareToOrdination +compareToOrdination=function(dat, Step2, clusters, targetSpecies){ + + # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5). + data(correspLevel7to5) + + # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). + data(correspMixedMetier) + + + ########################## + # FIRST SPECIES IN CATCH # + ########################## + + dat=as.matrix(dat[,2:ncol(dat)]) + p=ncol(dat) + firstSp=list() + nbLogFirstSp=list() + + # Determine the first species for each logevent (the catchest species of the logevent) + firstSp=apply(dat,1,function(x) names(which.max(x))) + metiersFirstSpeciesL7=firstSp + + # Calculate the number of logevent by first species (number of logevents by cluster) + nbLogFirstSp=sapply(unique(unlist(firstSp)),function(x) table(firstSp==x)[2]) + + names(nbLogFirstSp)=substr(names(nbLogFirstSp),1,3) + + # Number of logevents linked to each species from the method "first species in catch" + png("Number of logevents by ordination metier level7.png", width = 1200, height = 800) + barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)], main="Number of logevents by ordination metier level7", xlab="Main species", ylab="Number of Logevents", las=3) + dev.off() + # idem without the species with the biggest number of logevents + png(paste("Number of logevents by ordination metier level7 without ",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],".png",sep=""), width = 1200, height = 800) + barplot(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)][-1], main=paste("Number of logevents by ordination metier level7 without",names(nbLogFirstSp[order(nbLogFirstSp,decreasing=TRUE)])[1],sep=" "), xlab="Main species", ylab="Number of Logevents", las=3) + dev.off() + + + # Names of species from the ordination method + nomEspOrdi=names(nbLogFirstSp) + nbEspOrdi=length(nomEspOrdi) + nomEspOrdi=nomEspOrdi[order(nomEspOrdi,decreasing=FALSE)] + # Initialization + numEspOrdiClarai=numeric() + nomEspOrdiPlot=character() + nbLogEspOrdiPlot=numeric() + nbClust=length(unique(clusters)) + + # For each cluster Clara + for(i in 1:nbClust){ + # Names of species from ordination method linked to the logevents of cluster Clara i + nomEspOrdiClarai=unique(unlist(firstSp[which(clusters==i)])) + # their number + numEspOrdiClarai=sapply(nomEspOrdiClarai, function(x) which(nomEspOrdi==x)) + + # Names of species to plot in the barplot of cluster i + nomEspOrdiPloti=rep("",nbEspOrdi) + nomEspOrdiPloti[numEspOrdiClarai]=nomEspOrdiClarai + nomEspOrdiPlot=rbind(nomEspOrdiPlot,nomEspOrdiPloti) + + # for the cluster i, number of logevents of cluster i linked to each species from ordination method + nbLogEspOrdiPloti=rep(0,nbEspOrdi) + nbLogEspOrdiPloti[numEspOrdiClarai]=sapply(nomEspOrdiClarai,function(x) rev(table(firstSp[which(clusters==i)]==x))[1]) + nbLogEspOrdiPlot=rbind(nbLogEspOrdiPlot,nbLogEspOrdiPloti) + } + colnames(nbLogEspOrdiPlot)=names(nbLogFirstSp)[order(names(nbLogFirstSp))] + rownames(nbLogEspOrdiPlot)=paste("nbLogFirstSpCluster",seq=1:nbClust,sep="") + + # Graphics + png("Ordination_metiersL7_by_cluster.png", width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(nbLogEspOrdiPlot[i,],names.arg=nomEspOrdiPlot[i,], las=3) + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Number of logevents linked to each metier from the ordination method 'first species' by cluster","\n","\n",sep="")) + dev.off() + + # Projections on the first factorial plans + nomEspOrdi=substr(names(nbLogFirstSp),1,3) + espOrdi=matrix() + espOrdi=rbind(names(nbLogFirstSp),seq(1:length(nbLogFirstSp))) + numFirstSp=firstSp + for(i in 1:length(nbLogFirstSp)){ + numFirstSp[which(firstSp==nomEspOrdi[i])]=espOrdi[2,which(espOrdi[1,]==nomEspOrdi[i])] + } + + # Projections on the first factorial plans + png("Ordination_Projections.png", width = 1200, height = 800) + op <- par(mfrow=c(2,3)) + plot(Step2[,1], Step2[,2], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") + plot(Step2[,2], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") + plot(Step2[,1], Step2[,3], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") + plot(Step2[,1], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") + plot(Step2[,2], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") + plot(Step2[,3], Step2[,4], pch=21, bg=rainbow(length(nbLogFirstSp))[as.numeric(numFirstSp)], main="Projection of the classification by ordination on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") + par(op) + dev.off() + + + # Determine the metier level 5 from the metier level 7 based on the first species in catch (logevent by logevent) + print("Please, be patient...") + metiersFirstSpeciesL5=lapply(as.list(firstSp), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} + else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"]) + metiersFirstSpeciesL5=unlist(lapply(metiersFirstSpeciesL5, function(x) as.character(x))) + + + + + ################################### + # FIRST GROUP OF SPECIES IN CATCH # + ################################### + + # Determine the metier of each logevent thanks to the first group of species in catch (level 5) of the logevent + datL5=dat + groupColSpecies=lapply(as.list(colnames(datL5)), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} + else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"]) + groupColSpecies=unlist(lapply(groupColSpecies, function(x) as.character(x))) + colnames(datL5)=groupColSpecies + nbC5=length(unique(colnames(datL5))) + tab=matrix(NA,nrow=nrow(datL5),ncol=nbC5) + colnames(tab)=unique(colnames(datL5)) + for(i in 1:nbC5){ + # Sub-data containing only one metier level 5. + subdati=datL5[,which(colnames(datL5)==(unique(colnames(datL5))[i]))] + if(length(which(colnames(datL5)==(unique(colnames(datL5))[i])))==1) tab[,i]=subdati + else + tab[,i]=apply(subdati,1,sum) + } + # Metiers (level 5) of each logevent found thanks to the first group of species in catch (level 5) + metiersFirstGroupL5=unique(colnames(datL5))[apply(tab,1,which.max)] + + + + + + + ############################################################### + # LEVELS 7 AND 5 FOR METIERS FROM MULTIVARIATE CLASSIFICATION # + ############################################################### + + # List of metiers (level 5.7) + listMetiersLevel57=list() + for (i in 1:nbClust){ + metiersClusteri=lapply(targetSpecies[[i]], function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0){print(paste(x," : unknown species, classed in \'FIF\' group",sep=""));"FIF"} + else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"DCF_species_level5_COD"]) + metiersClusteri=as.character(unique(unlist(metiersClusteri))) + metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") + listMetiersLevel57[[i]]=metiersClusteri + } + + # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method + #(where mixed metiers labels are the aggregation of several simple metiers labels). + metiersClustersL5=unlist(sapply(clusters,function(x) listMetiersLevel57[x])) + + + # Correspondence of mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). + listMixedMetiersLevel57=lapply(listMetiersLevel57, function(x) if(nchar(x)!=3 & is.element(x,correspMixedMetier[,1])){correspMixedMetier[which(correspMixedMetier[,"combMetiersL5"]==x),"mixedMetierL5"]} + else x ) + + # Metiers (level 5.7) of each logevent found thanks to the getMetierClusters method (with official labels for mixed metiers). + mixedMetiersClustersL5=unlist(sapply(clusters,function(x) listMixedMetiersLevel57[x])) + + + + + + ################# + # COMPARISONS # + ################# + + # Compare the metiers 'Clusters' (from getMetierClusters) (level 7) vs the metier 'First Species' in catch (level 7) + compClustersL7vsFirstSpeciesL7=table(clusters,metiersFirstSpeciesL7) + rownames(compClustersL7vsFirstSpeciesL7)=paste("Clust",seq=1:nbClust,sep="") + + # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Species' (level 5) + clustStep3L5=clusters + compClustersL5vsFirstSpeciesL5=table(clustStep3L5,metiersFirstSpeciesL5) + rownames(compClustersL5vsFirstSpeciesL5)=unlist(listMixedMetiersLevel57) + + # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'First Group' (level 5) + #compL5ClustStep3VsGroup=table(clustersStep3L5,metiersFirstGroupL5) !! pb regroupement des clusters qui correspondent au m�me groupe d'esp�ce niveau 5. + clustStep3L5=clusters + compClustersL5vsFirstGroupL5=table(clustStep3L5,metiersFirstGroupL5) + rownames(compClustersL5vsFirstGroupL5)=unlist(listMixedMetiersLevel57) + + # Compare the metiers 'First Species' (level 5) vs the metiers 'First Group' (level 5) + compFirstSpeciesL5vsFirstGroupL5=table(metiersFirstSpeciesL5,metiersFirstGroupL5) + + + # Less attractive + # Compare the metiers 'Clusters' (level 5.7) vs the metiers 'Clusters' (level 7) + clustStep3L7=clusters + compClustersL5vsClustersL7=table(mixedMetiersClustersL5,clustStep3L7) + colnames(compClustersL5vsClustersL7)=paste("Clust",seq=1:nbClust) + + + + # Create csv tables + dfTables=data.frame() + dfTables=c("compClustersL7vsFirstSpeciesL7","compClustersL5vsFirstSpeciesL5", + "compClustersL5vsFirstGroupL5","compFirstSpeciesL5vsFirstGroupL5","compClustersL5vsClustersL7") + write.table(dfTables[1],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(compClustersL7vsFirstSpeciesL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(dfTables[2],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(compClustersL5vsFirstSpeciesL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(dfTables[3],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(compClustersL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(dfTables[4],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(compFirstSpeciesL5vsFirstGroupL5,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(dfTables[5],file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + write.table(compClustersL5vsClustersL7,file="tablesCompToOrdination.csv",append=TRUE,col.names=NA) + + print("Done.") + + return(list(nbLogFirstSp=nbLogFirstSp, + compClustersL7vsFirstSpeciesL7=compClustersL7vsFirstSpeciesL7, + compClustersL5vsFirstSpeciesL5=compClustersL5vsFirstSpeciesL5, + compClustersL5vsFirstGroupL5=compClustersL5vsFirstGroupL5, + compFirstSpeciesL5vsFirstGroupL5=compFirstSpeciesL5vsFirstGroupL5, + compClustersL5vsClustersL7=compClustersL5vsClustersL7)) +} diff --git a/vmstools/R/countPings.r b/vmstools/R/countPings.r index 508e22d..cb6af64 100644 --- a/vmstools/R/countPings.r +++ b/vmstools/R/countPings.r @@ -1,80 +1,118 @@ -countPings <- function(formula,tacsat,grid=NULL,by=NULL){ - - #Turn selected variabels into element list - form <- formula - if (form[[1]] != "~") - stop("Error: Formula must be one-sided.") - formc <- as.character(form[2]) - formc <- gsub(" ", "", formc) - if (!is.element(substring(formc, 1, 1), c("+", "-"))) - formc <- paste("+", formc, sep = "") - vars <- unlist(strsplit(formc, "[\\+\\-]")) - vars <- vars[vars != ""] - signs <- formc - for (i in 1:length(vars)) { - signs <- gsub(vars[i], "", signs) - } - signs <- unlist(strsplit(signs, "")) #Currently we do not use signs - - #Define which variables selected are column names, time variables or spatial variables - Vars <- vars[which(!vars %in% c("day","week","month","quarter","year","gridcell","icesrectangle","icesarea"))] - timeVars <- vars[which(vars %in% c("day","week","month","quarter","year"))] - spatVars <- vars[which(vars %in% c("gridcell","icesrectangle","icesarea"))] - - #Add time notation if you want this as output - if(length(timeVars)>0){ - if(!length(grep("SI_DATIM",colnames(tacsat)))>0) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - if("day" %in% timeVars & !"SI_DAY" %in% colnames(tacsat)){ tacsat$SI_DAY <- an(format(tacsat$SI_DATIM,format="%j"))}; if("day" %in% timeVars){ ; timeVars[which(timeVars=="day")] <- "SI_DAY"} - if("week" %in% timeVars & !"SI_WEEK" %in% colnames(tacsat)){ tacsat$SI_WEEK <- an(format(tacsat$SI_DATIM,format="%W"))}; if("week" %in% timeVars){ ; timeVars[which(timeVars=="week")] <- "SI_WEEK" } - if("month" %in% timeVars & !"SI_MONTH" %in% colnames(tacsat)){ tacsat$SI_MONTH <- an(format(tacsat$SI_DATIM,format="%m"))}; if("month" %in% timeVars){ ; timeVars[which(timeVars=="month")] <- "SI_MONTH"} - if("quarter" %in% timeVars & !"SI_QUART" %in% colnames(tacsat)){ tacsat$SI_QUART <- an(substr(quarters(tacsat$SI_DATIM),2,2))}; if("quarter" %in% timeVars){ ; timeVars[which(timeVars=="quarter")] <- "SI_QUART"} - if("year" %in% timeVars & !"SI_YEAR" %in% colnames(tacsat)){ tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y"))}; if("year" %in% timeVars){ ; timeVars[which(timeVars=="year")] <- "SI_YEAR" } - } - #Add spatial notation if you want this as output - if(length(spatVars)>0){ - if("gridcell" %in% spatVars & is.null(grid) == TRUE) stop("Grid needs to be specified to use the 'gridcell' option") - if("gridcell" %in% spatVars & is.null(grid) == FALSE){ - #Create coordinates of tacsat data - coords <- cbind(x=tacsat$SI_LONG,y=tacsat$SI_LATI) - sPDF <- SpatialPointsDataFrame(coords,data=tacsat) - #Turn grid into a spatial pixel dataframe - grid <- as(grid,"SpatialPixels"); - grid <- as(grid,"SpatialPixelsDataFrame") - #Overlay the two spatial frameworks to see to which gridcell each tacsat coordinate belongs - gridCellIndex <- over(as(sPDF,"SpatialPoints"),as(grid,"SpatialPixels")) - newCoords <- sPDF@coords[gridCellIndex,] - - tacsat$GR_LONG <- newCoords[,1] - tacsat$GR_LATI <- newCoords[,2] - spatVars[which(spatVars=="gridcell")] <- "GR_LONG"; spatVars <- c(spatVars,"GR_LATI") - } - if("icesrectangle" %in% spatVars){ - if(!"LE_RECT" %in% colnames(tacsat)) - tacsat$LE_RECT <- ICESrectangle(tacsat) - spatVars[which(spatVars=="icesrectangle")] <- "LE_RECT" - } - if("icesarea" %in% spatVars){ - if(!"LE_AREA" %in% colnames(tacsat)) - tacsat$LE_AREA <- ICESarea(tacsat) - spatVars[which(spatVars=="icesarea")] <- "LE_AREA" - } - } - - if(is.null(by)){ - tacsat$SUM <- 1 - } else { - tacsat$SUM <- tacsat[,by] - } - totVars <- c(Vars,timeVars,spatVars) - - #Do the counting of pings - for(iVars in 1:length(totVars)) tacsat[,totVars[iVars]] <- af(ac(tacsat[,totVars[iVars]])) - DT <- data.table(tacsat) - eq <- c.listquote(totVars) - - res <- DT[,sum(SUM),by=eval(eq)] - setnames(res,colnames(res),c(totVars,"pings")) - #colnames(res) <- c(totVars,"pings") - - return(data.frame(res))} - +#' Count the number of VMS pings in a selection +#' +#' Count the number of VMS pings in any selection made in time and spatial +#' frame. +#' +#' Formula has form: ~Variable+timeVariable+anotherTimeVariable+spatialVariable +#' \cr +#' +#' options in formula for Variable: any column name of tacsat file \cr options +#' in formula for timeVariable: day, week, month, quarter, year \cr options in +#' formula for spatialVariable: icesrectangle, icesarea, gridcell \cr +#' +#' @param formula specify the elements you want to use as axis of the count +#' @param tacsat tacsat data file, possibly with additional columns +#' @param grid if in formulate 'gridcell' is chosen, a SpatialGrid must be +#' provided +#' @return Returns the matrix with counted pings by each specified variable +#' @note if Tacsat is a big file, the overlay function might fail resulting in +#' terminating the function +#' @author Niels T. Hintzen +#' @seealso \code{\link{createGrid}}, \code{\link{vmsGridCreate}} +#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using +#' cubic Hermite spline interpolation of position registration data, EU lot 2 +#' project +#' @examples +#' +#' data(tacsat) +#' +#' #make the tacsat file a bit smaller +#' tacsat <- tacsat[1:10000,] +#' +#' grid <- createGrid(range(tacsat$SI_LONG,na.rm=TRUE), +#' range(tacsat$SI_LATI,na.rm=TRUE),0.5,0.5,type="SpatialGrid") +#' +#' result <- countPings(~VE_REF+year+gridcell,tacsat,grid=grid) +#' result <- countPings(~VE_REF+week+year+icesrectangle,tacsat) +#' +#' @export countPings +countPings <- function(formula,tacsat,grid=NULL,by=NULL){ + + #Turn selected variabels into element list + form <- formula + if (form[[1]] != "~") + stop("Error: Formula must be one-sided.") + formc <- as.character(form[2]) + formc <- gsub(" ", "", formc) + if (!is.element(substring(formc, 1, 1), c("+", "-"))) + formc <- paste("+", formc, sep = "") + vars <- unlist(strsplit(formc, "[\\+\\-]")) + vars <- vars[vars != ""] + signs <- formc + for (i in 1:length(vars)) { + signs <- gsub(vars[i], "", signs) + } + signs <- unlist(strsplit(signs, "")) #Currently we do not use signs + + #Define which variables selected are column names, time variables or spatial variables + Vars <- vars[which(!vars %in% c("day","week","month","quarter","year","gridcell","icesrectangle","icesarea"))] + timeVars <- vars[which(vars %in% c("day","week","month","quarter","year"))] + spatVars <- vars[which(vars %in% c("gridcell","icesrectangle","icesarea"))] + + #Add time notation if you want this as output + if(length(timeVars)>0){ + if(!length(grep("SI_DATIM",colnames(tacsat)))>0) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + if("day" %in% timeVars & !"SI_DAY" %in% colnames(tacsat)){ tacsat$SI_DAY <- an(format(tacsat$SI_DATIM,format="%j"))}; if("day" %in% timeVars){ ; timeVars[which(timeVars=="day")] <- "SI_DAY"} + if("week" %in% timeVars & !"SI_WEEK" %in% colnames(tacsat)){ tacsat$SI_WEEK <- an(format(tacsat$SI_DATIM,format="%W"))}; if("week" %in% timeVars){ ; timeVars[which(timeVars=="week")] <- "SI_WEEK" } + if("month" %in% timeVars & !"SI_MONTH" %in% colnames(tacsat)){ tacsat$SI_MONTH <- an(format(tacsat$SI_DATIM,format="%m"))}; if("month" %in% timeVars){ ; timeVars[which(timeVars=="month")] <- "SI_MONTH"} + if("quarter" %in% timeVars & !"SI_QUART" %in% colnames(tacsat)){ tacsat$SI_QUART <- an(substr(quarters(tacsat$SI_DATIM),2,2))}; if("quarter" %in% timeVars){ ; timeVars[which(timeVars=="quarter")] <- "SI_QUART"} + if("year" %in% timeVars & !"SI_YEAR" %in% colnames(tacsat)){ tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y"))}; if("year" %in% timeVars){ ; timeVars[which(timeVars=="year")] <- "SI_YEAR" } + } + #Add spatial notation if you want this as output + if(length(spatVars)>0){ + if("gridcell" %in% spatVars & is.null(grid) == TRUE) stop("Grid needs to be specified to use the 'gridcell' option") + if("gridcell" %in% spatVars & is.null(grid) == FALSE){ + #Create coordinates of tacsat data + coords <- cbind(x=tacsat$SI_LONG,y=tacsat$SI_LATI) + sPDF <- SpatialPointsDataFrame(coords,data=tacsat) + #Turn grid into a spatial pixel dataframe + grid <- as(grid,"SpatialPixels"); + grid <- as(grid,"SpatialPixelsDataFrame") + #Overlay the two spatial frameworks to see to which gridcell each tacsat coordinate belongs + gridCellIndex <- over(as(sPDF,"SpatialPoints"),as(grid,"SpatialPixels")) + newCoords <- sPDF@coords[gridCellIndex,] + + tacsat$GR_LONG <- newCoords[,1] + tacsat$GR_LATI <- newCoords[,2] + spatVars[which(spatVars=="gridcell")] <- "GR_LONG"; spatVars <- c(spatVars,"GR_LATI") + } + if("icesrectangle" %in% spatVars){ + if(!"LE_RECT" %in% colnames(tacsat)) + tacsat$LE_RECT <- ICESrectangle(tacsat) + spatVars[which(spatVars=="icesrectangle")] <- "LE_RECT" + } + if("icesarea" %in% spatVars){ + if(!"LE_AREA" %in% colnames(tacsat)) + tacsat$LE_AREA <- ICESarea(tacsat) + spatVars[which(spatVars=="icesarea")] <- "LE_AREA" + } + } + + if(is.null(by)){ + tacsat$SUM <- 1 + } else { + tacsat$SUM <- tacsat[,by] + } + totVars <- c(Vars,timeVars,spatVars) + + #Do the counting of pings + for(iVars in 1:length(totVars)) tacsat[,totVars[iVars]] <- af(ac(tacsat[,totVars[iVars]])) + DT <- data.table(tacsat) + eq <- c.listquote(totVars) + + res <- DT[,sum(SUM),by=eval(eq)] + setnames(res,colnames(res),c(totVars,"pings")) + #colnames(res) <- c(totVars,"pings") + + return(data.frame(res))} + diff --git a/vmstools/R/createGrid.r b/vmstools/R/createGrid.r index 606e91b..5dd90c9 100644 --- a/vmstools/R/createGrid.r +++ b/vmstools/R/createGrid.r @@ -1,60 +1,98 @@ -`createGrid` <- -function(xrange - ,yrange - ,resx - ,resy - ,type="GridTopology" - ,exactBorder=FALSE - ){ - - require(sp) - if(exactBorder){ - xs <- seq(xrange[1]+resx/2,xrange[2]+resx,resx) - xborder <- range(xs) - ys <- seq(yrange[1]+resy/2,yrange[2]+resy,resy) - yborder <- range(ys) - } else { - roundDigitx <- getndp(resx) - roundDigity <- getndp(resy) - xs <- seq(xrange[1],xrange[2] +resx,resx) - xborder <- c((min(floor(xs*10^roundDigitx))/10^roundDigitx)- resx/2,(max(ceiling(xs*10^roundDigitx)) / 10^roundDigitx)+resx/2) - ys <- seq(yrange[1],yrange[2] +resy,resy) - yborder <- c((min(floor(ys*10^roundDigity))/10^roundDigity)- resy/2,(max(ceiling(ys*10^roundDigity)) / 10^roundDigity)+resy/2) - } - - grid <- GridTopology(c(xborder[1],yborder[1]),c(resx,resy),c(length(seq(xborder[1],xborder[2],resx)),length(seq(yborder[1],yborder[2],resy)))) - if(type=="SpatialGrid"){ - grid <- SpatialGrid(grid=grid); - } - if(type=="SpatialPixels"){ - grid <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy)))) - grid <- SpatialGrid(grid=grid); - gridded(grid) = TRUE - grid <- as(grid,"SpatialPixels"); - } - if(type=="SpatialPixelsDataFrame"){ - grid <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy)))) - grid <- SpatialGrid(grid=grid); - gridded(grid) = TRUE - grid <- as(grid,"SpatialPixels"); - sPDF <- as(grid,"SpatialPixelsDataFrame") - sPDF@data <- data.frame(rep(0,nrow(coordinates(sPDF)))) - colnames(sPDF@data) <- "data" - grid <- sPDF - } - if(type=="SpatialGridDataFrame"){ - grid <- SpatialGrid(grid=grid); - sPDF <- as(grid,"SpatialGridDataFrame") - sPDF@data <- data.frame(rep(0,nrow(coordinates(sPDF)))) - colnames(sPDF@data) <- "data" - grid <- sPDF - } - - if(!class(grid)=="GridTopology"){ - bb <- bbox(grid) - if(bb[1,1] > min(xrange) | max(xrange) > bb[1,2] | bb[2,1] > min(yrange) | max(yrange) > bb[2,2]) - stop("Dimensions of grid not large enough to span xranges and yranges (bug)") - } - - return(grid)} - +#' create a Grid (GridTopology: sp) depending on x and y limits and gridcell +#' size +#' +#' Function creates a grid with rounded (no decimal) begin and end longitude +#' and latitude based on the ranges given. It creates this grid with gridcell +#' size specified. +#' +#' The grid is created depending on the 'sp' package. The grid defined has +#' default class 'GridTopology'. +#' +#' @param xrange specify minimum and maximum longitude / x values which should +#' at least be in the grid +#' @param yrange specify minimum and maximum latitude / y values which should +#' at least be in the grid +#' @param resx gridcell size in degrees in the longitude / x direction +#' @param resy gridcell size in degrees in the latitude / y direction +#' @param type specify output class: +#' 'GridTopology','SpatialGrid','SpatialPixels', +#' 'SpatialPixelsDataFrame','SpatialGridDataFrame' +#' @param exactBorder Logical: if the specified resx and resy (lower-left +#' values) should be taken as exact +#' @author Niels T. Hintzen +#' @seealso \code{\link{vmsGridCreate}} +#' @references EU Lot 2 project +#' @examples +#' +#' x <- seq(-4.999,5,length.out=10) +#' y <- seq(50.002,55,length.out=10) +#' xrange <- range(x,na.rm=TRUE) +#' yrange <- range(y,na.rm=TRUE) +#' +#' #returns a grid with 1001 cells in the x-direction and 101 in the y-direction +#' Grid <- createGrid(xrange,yrange,0.01,0.05,type="SpatialGrid") +#' bbox(Grid) +#' Grid <- createGrid(xrange,yrange,0.01,0.05,type="SpatialGrid",exactBorder=TRUE) +#' bbox(Grid) +#' +#' @export createGrid +`createGrid` <- +function(xrange + ,yrange + ,resx + ,resy + ,type="GridTopology" + ,exactBorder=FALSE + ){ + + require(sp) + if(exactBorder){ + xs <- seq(xrange[1]+resx/2,xrange[2]+resx,resx) + xborder <- range(xs) + ys <- seq(yrange[1]+resy/2,yrange[2]+resy,resy) + yborder <- range(ys) + } else { + roundDigitx <- getndp(resx) + roundDigity <- getndp(resy) + xs <- seq(xrange[1],xrange[2] +resx,resx) + xborder <- c((min(floor(xs*10^roundDigitx))/10^roundDigitx)- resx/2,(max(ceiling(xs*10^roundDigitx)) / 10^roundDigitx)+resx/2) + ys <- seq(yrange[1],yrange[2] +resy,resy) + yborder <- c((min(floor(ys*10^roundDigity))/10^roundDigity)- resy/2,(max(ceiling(ys*10^roundDigity)) / 10^roundDigity)+resy/2) + } + + grid <- GridTopology(c(xborder[1],yborder[1]),c(resx,resy),c(length(seq(xborder[1],xborder[2],resx)),length(seq(yborder[1],yborder[2],resy)))) + if(type=="SpatialGrid"){ + grid <- SpatialGrid(grid=grid); + } + if(type=="SpatialPixels"){ + grid <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy)))) + grid <- SpatialGrid(grid=grid); + gridded(grid) = TRUE + grid <- as(grid,"SpatialPixels"); + } + if(type=="SpatialPixelsDataFrame"){ + grid <- GridTopology(c(xborder[1]-resx/2,yborder[1]-resy/2),c(resx,resy),c(length(seq(xborder[1]-resx/2,xborder[2]+resx/2,resx)),length(seq(yborder[1]-resy/2,yborder[2]+resy/2,resy)))) + grid <- SpatialGrid(grid=grid); + gridded(grid) = TRUE + grid <- as(grid,"SpatialPixels"); + sPDF <- as(grid,"SpatialPixelsDataFrame") + sPDF@data <- data.frame(rep(0,nrow(coordinates(sPDF)))) + colnames(sPDF@data) <- "data" + grid <- sPDF + } + if(type=="SpatialGridDataFrame"){ + grid <- SpatialGrid(grid=grid); + sPDF <- as(grid,"SpatialGridDataFrame") + sPDF@data <- data.frame(rep(0,nrow(coordinates(sPDF)))) + colnames(sPDF@data) <- "data" + grid <- sPDF + } + + if(!class(grid)=="GridTopology"){ + bb <- bbox(grid) + if(bb[1,1] > min(xrange) | max(xrange) > bb[1,2] | bb[2,1] > min(yrange) | max(yrange) > bb[2,2]) + stop("Dimensions of grid not large enough to span xranges and yranges (bug)") + } + + return(grid)} + diff --git a/vmstools/R/degree2Km.R b/vmstools/R/degree2Km.R index 26992e2..3e83959 100644 --- a/vmstools/R/degree2Km.R +++ b/vmstools/R/degree2Km.R @@ -1,12 +1,35 @@ -`degree2Km` <- -function(lon,lat,degree){ - x1 <- lon - y1 <- lat - - a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2); - c <- 2*atan2(sqrt(a),sqrt(1-a)); - R <- 6371; - dx1 <- R*c - - return(dx1 * degree)} - +#' Compute distance from degrees into kilometers +#' +#' Function transformes the distance expressed in degrees into kilometers. This +#' based on the GPS location of a point. +#' +#' +#' @param lon Longitude of the GPS position +#' @param lat Latitude of the GPS positiona +#' @param degree Value in degrees to turn into Km +#' @note Computation of Km is approximation based on the Haversine formula +#' @author Niels T. Hintzen +#' @seealso \code{\link{distance}}, \code{\link{km2Degree}}, +#' \code{\link{lonLatRatio}} +#' @references EU lot 2 project +#' @examples +#' +#' lon <- -4 +#' lat <- 50 +#' degree <- 1.601833 +#' +#' degree2Km(lon,lat,degree) #114.4897km +#' +#' @export degree2Km +`degree2Km` <- +function(lon,lat,degree){ + x1 <- lon + y1 <- lat + + a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2); + c <- 2*atan2(sqrt(a),sqrt(1-a)); + R <- 6371; + dx1 <- R*c + + return(dx1 * degree)} + diff --git a/vmstools/R/destFromBearing.r b/vmstools/R/destFromBearing.r index 6218a37..f19b745 100644 --- a/vmstools/R/destFromBearing.r +++ b/vmstools/R/destFromBearing.r @@ -1,15 +1,36 @@ -destFromBearing <- function(lon,lat,bearing,distance){ - - pd <- pi/180 - dist <- (distance/6371) - y1 <- lat * pd - x1 <- lon * pd - bear <- bearing * pd - - y2 <- asin(sin(y1) * cos(dist) + cos(y1) * sin(dist) * cos(bear)) - x2 <- x1 + atan2(sin(bear) * sin(dist) * cos(y1),cos(dist) - sin(y1) * sin(y2)) - - x2 <- (x2 + 3*pi) %% (2*pi) - pi - y2 <- y2 / pd - x2 <- x2 / pd - return(cbind(x2,y2))} \ No newline at end of file +#' Find destination from point of origin given bearing and distance +#' +#' Find destination from point of origin given bearing and distance +#' +#' +#' @param lon Longitude of origin +#' @param lat Latitude of origin +#' @param bearing Bearing to destination +#' @param distance Distance to cover to destination in km +#' @return Returnes the destination point(s) as matrix +#' @author Niels T. Hintzen +#' @seealso \code{\link{addWidth}} +#' @references EU Lot 2 project +#' @examples +#' +#' res <- destFromBearing(rep(2.5,10),rep(51.5,10), +#' runif(10,0,360),runif(10,0,0.1)) +#' plot(res[,1],res[,2]) +#' points(2.5,51.5,cex=1.1,pch=19) +#' +#' @export destFromBearing +destFromBearing <- function(lon,lat,bearing,distance){ + + pd <- pi/180 + dist <- (distance/6371) + y1 <- lat * pd + x1 <- lon * pd + bear <- bearing * pd + + y2 <- asin(sin(y1) * cos(dist) + cos(y1) * sin(dist) * cos(bear)) + x2 <- x1 + atan2(sin(bear) * sin(dist) * cos(y1),cos(dist) - sin(y1) * sin(y2)) + + x2 <- (x2 + 3*pi) %% (2*pi) - pi + y2 <- y2 / pd + x2 <- x2 / pd + return(cbind(x2,y2))} diff --git a/vmstools/R/diffInter.R b/vmstools/R/diffInter.R index 4fa7b8f..a1dfabc 100644 --- a/vmstools/R/diffInter.R +++ b/vmstools/R/diffInter.R @@ -1,43 +1,99 @@ -diffInter <- function(interpolation - ,reference){ - - - #Get the starting and ending positions of the interpolations - interIdx <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,nrow=length(interpolation),dimnames=list(interpolation=1:length(interpolation),c("x","y")),byrow=TRUE) - #Store the deviations from the interpolation and reference dataset - storeDiffs <- matrix(NA,nrow=length(interpolation),ncol=5,dimnames=list(1:length(interpolation),c("mean","logmean","sd","logsd","sum"))) - - #Loop over all the interpolations - for(i in 1:length(interpolation)){ - int <- interpolation[[i]] - ref <- reference[seq(interIdx[i,1],interIdx[i,2],1),] - - #Calculate the difference between each datapoint - distInt <- distance(int[3:dim(int)[1],1], int[3:dim(int)[1],2], int[2:(dim(int)[1]-1),1], int[2:(dim(int)[1]-1),2]) - distRef <- distance(ref$SI_LONG[2:dim(ref)[1]], ref$SI_LATI[2:dim(ref)[1]], ref$SI_LONG[1:(dim(ref)[1]-1)], ref$SI_LATI[1:(dim(ref)[1]-1)]) - #To calculate the total distance travelled, sum all individual distances - cumsumInt <- cumsum(distInt) - cumsumRef <- cumsum(distRef) - #To select the points on the interpolated track to match up with the reference points we do the following - #- First rescale the distance travelled within the reference set to equal the total distance in the interpolated set - #- Than substract the distance travelled in the reference set for each point from the distance travelled in the interpolated set, between all points - #- Search for the point in the interpolated set that comes closest to that distance travelled - # By doing this, we assume that an equal distance of the total in the reference set and in the interpolated set is travelled - # This enables the user too to make use of irregular reference set polling rates, as matching points are found based on distance travelled - # As well, if vessels speed between two points and slow down between other, this does match up better with the interpolated set - matchRefDist <- c(cumsumRef / (rev(cumsumRef)[1]/rev(cumsumInt)[1])) - matchRef <- c(1,apply(abs(outer(matchRefDist,cumsumInt,"-")),1,which.min)+1) - - matchPx <- int[matchRef+1,1] - matchPy <- int[matchRef+1,2] - - #Calculate the distance between the reference points and the points on the interpolated track that are matched - res <- distance(matchPx,matchPy,ref$SI_LONG,ref$SI_LATI) - #Store the differences for each interpolation - storeDiffs[i,]<- c( mean(res[-c(1,length(res))],na.rm=TRUE), - exp(mean(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)), - sd(res[-c(1,length(res))],na.rm=TRUE), - exp(sd(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)), - sum(res[-c(1,length(res))],na.rm=TRUE)) - } -return(storeDiffs)} +#' Calculate distance between interpolation and reference set +#' +#' Calculate for each interpolation the distance at fixed points, depending on +#' the number of points present in the reference set, between the interpolated +#' trajectory and the reference trajectory. This indicates the deviation of the +#' interpolated set to the reference set. +#' +#' Each interpolation has a start and end point which are similar to the start +#' and end point of the reference set. In between these two points, the +#' reference set can have more in-between points which are not used for +#' interpolation, depending on the interval chosen in the 'interpolateVMS' +#' function. These points are matched up with the points on the interpolated +#' track. For each of these reference in-between points, it is calculated which +#' fraction of the total distance was travelled. The same fraction is applied +#' to the interpolated track, and the point that matches with this distance is +#' linked to the reference set point. The distance function is used to +#' calculate the distance between the reference and interpolated dataset. +#' +#' The mean, logmean, standard deviation, sdlog and total sum of these +#' distances is returned as a matrix. +#' +#' @param interpolation Interpolated dataset as output from the function +#' 'interpolateVMS' +#' @param reference Original high higher resolution VMS dataset +#' @author Niels T. Hintzen +#' @seealso \code{\link{distance}}, \code{\link{distanceInterpolation}}, +#' \code{\link{distanceVMS}} +#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using +#' cubic Hermite spline interpolation of position registration data, EU lot 2 +#' project +#' @examples +#' +#' \dontrun{ +#' data(VMShf) +#' +#' #-Put the data in the right format +#' VMShf$SI_DATE <- format(as.Date(VMShf$date),"%d/%m/%Y") +#' VMShf$SI_TIME <- format(VMShf$date, "%H:%M") +#' +#' colnames(VMShf) <- c("VE_REF","SI_LATI","SI_LONG","SI_SP", +#' "SI_HE","SI_DATIM","SI_DATE","SI_TIME") +#' +#' #-Sort the data and remove non-fishing pings +#' VMShf <- sortTacsat(VMShf) +#' VMShf <- filterTacsat(VMShf,c(2,6),NULL,T) +#' +#' interpolation <- interpolateTacsat(VMShf,interval=120,margin=10,res=100, +#' method="cHs",params=list(fm=0.3,distscale=20, +#' sigline=0.2,st=c(2,6)),headingAdjustment=0) +#' +#' #Returns a matrix with 21 rows (each row represents 1 interpolation) +#' # and 5 measures +#' cHs <- diffInter(interpolation,VMShf) +#' } +#' +#' @export diffInter +diffInter <- function(interpolation + ,reference){ + + + #Get the starting and ending positions of the interpolations + interIdx <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,nrow=length(interpolation),dimnames=list(interpolation=1:length(interpolation),c("x","y")),byrow=TRUE) + #Store the deviations from the interpolation and reference dataset + storeDiffs <- matrix(NA,nrow=length(interpolation),ncol=5,dimnames=list(1:length(interpolation),c("mean","logmean","sd","logsd","sum"))) + + #Loop over all the interpolations + for(i in 1:length(interpolation)){ + int <- interpolation[[i]] + ref <- reference[seq(interIdx[i,1],interIdx[i,2],1),] + + #Calculate the difference between each datapoint + distInt <- distance(int[3:dim(int)[1],1], int[3:dim(int)[1],2], int[2:(dim(int)[1]-1),1], int[2:(dim(int)[1]-1),2]) + distRef <- distance(ref$SI_LONG[2:dim(ref)[1]], ref$SI_LATI[2:dim(ref)[1]], ref$SI_LONG[1:(dim(ref)[1]-1)], ref$SI_LATI[1:(dim(ref)[1]-1)]) + #To calculate the total distance travelled, sum all individual distances + cumsumInt <- cumsum(distInt) + cumsumRef <- cumsum(distRef) + #To select the points on the interpolated track to match up with the reference points we do the following + #- First rescale the distance travelled within the reference set to equal the total distance in the interpolated set + #- Than substract the distance travelled in the reference set for each point from the distance travelled in the interpolated set, between all points + #- Search for the point in the interpolated set that comes closest to that distance travelled + # By doing this, we assume that an equal distance of the total in the reference set and in the interpolated set is travelled + # This enables the user too to make use of irregular reference set polling rates, as matching points are found based on distance travelled + # As well, if vessels speed between two points and slow down between other, this does match up better with the interpolated set + matchRefDist <- c(cumsumRef / (rev(cumsumRef)[1]/rev(cumsumInt)[1])) + matchRef <- c(1,apply(abs(outer(matchRefDist,cumsumInt,"-")),1,which.min)+1) + + matchPx <- int[matchRef+1,1] + matchPy <- int[matchRef+1,2] + + #Calculate the distance between the reference points and the points on the interpolated track that are matched + res <- distance(matchPx,matchPy,ref$SI_LONG,ref$SI_LATI) + #Store the differences for each interpolation + storeDiffs[i,]<- c( mean(res[-c(1,length(res))],na.rm=TRUE), + exp(mean(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)), + sd(res[-c(1,length(res))],na.rm=TRUE), + exp(sd(log(res[-c(1,length(res))])[which(is.finite(log(res[-c(1,length(res))]))==TRUE)],na.rm=TRUE)), + sum(res[-c(1,length(res))],na.rm=TRUE)) + } +return(storeDiffs)} diff --git a/vmstools/R/distance.R b/vmstools/R/distance.R index f6c76ab..ce7d2e1 100644 --- a/vmstools/R/distance.R +++ b/vmstools/R/distance.R @@ -1,13 +1,39 @@ -`distance` <- -function(lon,lat,lonRef,latRef){ - - pd <- pi/180 - - a1<- sin(((latRef-lat)*pd)/2) - a2<- cos(lat*pd) - a3<- cos(latRef*pd) - a4<- sin(((lonRef-lon)*pd)/2) - a <- a1*a1+a2*a3*a4*a4 - - c <- 2*atan2(sqrt(a),sqrt(1-a)); - return(6371*c)} \ No newline at end of file +#' Compute distance between two points on a sphere (approximation of the earth) +#' +#' Compute the distance between two GPS locations defined in longitude and +#' latitude notation on the earth. The earth is assumed to have a perfect +#' spherical shape. Distance is returned in km. +#' +#' +#' @param lon Longitude of point 2 +#' @param lat Latitude of point 2 +#' @param lonRef Longitude of point 1 +#' @param latRef Latitude of point 1 +#' @author Niels T. Hintzen +#' @seealso \code{\link{km2Degree}}, \code{\link{degree2Km}}, +#' \code{\link{lonLatRatio}} +#' @references EU Lot 2 project, based on the Haversine formula, see also: +#' Hintzen et al. 2010 Fisheries Research +#' @examples +#' +#' lon <- -4 +#' lat <- 50 +#' lonRef <- -4.2 +#' latRef <- 51 +#' +#' distance(lon,lat,lonRef,latRef) #112.09 +#' +#' @export distance +`distance` <- +function(lon,lat,lonRef,latRef){ + + pd <- pi/180 + + a1<- sin(((latRef-lat)*pd)/2) + a2<- cos(lat*pd) + a3<- cos(latRef*pd) + a4<- sin(((lonRef-lon)*pd)/2) + a <- a1*a1+a2*a3*a4*a4 + + c <- 2*atan2(sqrt(a),sqrt(1-a)); + return(6371*c)} diff --git a/vmstools/R/distanceInterpolation.R b/vmstools/R/distanceInterpolation.R index 190c16c..0dd0644 100644 --- a/vmstools/R/distanceInterpolation.R +++ b/vmstools/R/distanceInterpolation.R @@ -1,14 +1,54 @@ -distanceInterpolation <- function(interpolation){ - - res <- unlist(lapply(interpolation,function(x){ - dims <- dim(x) - res <- distance(x[3:dims[1],1],x[3:dims[1],2],x[2:(dims[1]-1),1],x[2:(dims[1]-1),2]) - return(sum(res,na.rm=TRUE))})) - - return(res)} - - - - - - \ No newline at end of file +#' Calculate the distance (in km) from an interpolated dataset +#' +#' For each interpolation between two succeeding datapoints, the distance +#' travelled is computed based on the function 'distance()'. This function is +#' an easy to use wrapper for a whole interpolated dataset +#' +#' On default, each interpolation consists of 100 points. Hence, 99 distances +#' are computed for each interpolation. Returned are 5 +#' +#' @param interpolation Interpolated dataset as output from the function +#' 'interpolateVMS' +#' @author Niels T. Hintzen +#' @seealso \code{\link{distance}}, \code{\link{distanceTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' #Speed threshold points (two values), NULL means use all points +#' st <- c(2,6) +#' #Remove duplicate records in VMS dataset +#' remDup <- TRUE +#' +#' #Sort the tacsat data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the tacsat data +#' tacsat <- filterTacsat(tacsat,st,NULL,remDup) +#' +#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10, +#' res=100,method="cHs",params=list(fm=0.3,distscale=20, +#' sigline=0.2,st=c(2,6)),headingAdjustment=0) +#' #Number of interpolations: +#' length(interpolation) +#' #Calculate distance: +#' #Returns 159 values, which represents the distance travelled in +#' #each of the 159 interpolations +#' distanceInterpolation(interpolation) +#' +#' @export distanceInterpolation +distanceInterpolation <- function(interpolation){ + + res <- unlist(lapply(interpolation,function(x){ + dims <- dim(x) + res <- distance(x[3:dims[1],1],x[3:dims[1],2],x[2:(dims[1]-1),1],x[2:(dims[1]-1),2]) + return(sum(res,na.rm=TRUE))})) + + return(res)} + + + + + + diff --git a/vmstools/R/distanceTacsat.r b/vmstools/R/distanceTacsat.r index 3c6871e..de0f4f1 100644 --- a/vmstools/R/distanceTacsat.r +++ b/vmstools/R/distanceTacsat.r @@ -1,11 +1,47 @@ -distanceTacsat <- function(tacsat,index){ - - res <- unlist(lapply(as.list(1:dim(index)[1]),function(x){ - iS <- index[x,1] - iE <- index[x,2] - iL <- iE-iS+1 - res <- distance(tacsat[iS:iE,]$SI_LONG[2:iL],tacsat[iS:iE,]$SI_LATI[2:iL], - tacsat[iS:iE,]$SI_LONG[1:(iL-1)],tacsat[iS:iE,]$SI_LATI[1:(iL-1)]) - return(sum(res,na.rm=TRUE))})) - - return(res)} \ No newline at end of file +#' Calculate the distance (in km) from a Tacsat dataset +#' +#' Calculates the distance of a Tacsat dataset with specification of succeeding +#' datapoints. Distance is only calculated between these specified datapoints +#' +#' index is designed as a matrix where rows represent succeeding datapoints, +#' column 1 represent start points, column 2 represent end points. +#' +#' @param tacsat tacsat (normal or high ping rate) dataset +#' @param index Matrix with specification of succeeding datapoints (see details +#' for format) +#' @author Niels T. Hintzen +#' @seealso \code{\link{distance}}, \code{\link{distanceInterpolation}}, +#' \code{\link{diffInter}} +#' @references EU lot 2 project +#' @examples +#' +#' +#' data(tacsat) +#' #Speed threshold points (two values), NULL means use all points +#' st <- c(2,6) +#' #Remove duplicate records in VMS dataset +#' remDup <- TRUE +#' +#' #Sort the VMS data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the VMS data +#' tacsat <- filterTacsat(tacsat,st,NULL,remDup) +#' +#' distanceTacsat(tacsat,matrix(c(2,3,3,4),nrow=2,ncol=2, +#' dimnames=list(1:2,c("startpoint","endpoint")))) +#' #6.335944 14.847291 +#' +#' @export distanceTacsat +distanceTacsat <- function(tacsat,index){ + + res <- unlist(lapply(as.list(1:dim(index)[1]),function(x){ + iS <- index[x,1] + iE <- index[x,2] + iL <- iE-iS+1 + res <- distance(tacsat[iS:iE,]$SI_LONG[2:iL],tacsat[iS:iE,]$SI_LATI[2:iL], + tacsat[iS:iE,]$SI_LONG[1:(iL-1)],tacsat[iS:iE,]$SI_LATI[1:(iL-1)]) + return(sum(res,na.rm=TRUE))})) + + return(res)} diff --git a/vmstools/R/effort.r b/vmstools/R/effort.r index 98d1ddd..fe5e6be 100644 --- a/vmstools/R/effort.r +++ b/vmstools/R/effort.r @@ -1,3 +1,48 @@ +#' Calculate effort of tacsat or eflalo dataset +#' +#' Calculate effort (in hours, days, ...) based on tacsat or eflalo dataset for +#' different combination of settings +#' +#' if 'byRow' is selected, no other elements can be added +#' +#' @param x Either eflalo or tacsat format data +#' @param by Vector including the elements to calculate effort by. Options are: +#' byRow, VE_REF, FT_REF, LE_GEAR, SI_DAY, SI_WEEK, SI_MONTH, SI_QUARTER, +#' SI_YEAR, LE_RECT, LE_AREA +#' @param unit Unit must be in 'secs,mins,hours,days or weeks' +#' @param weight Only relevant for tacsat: weight to apply to calculation of +#' mean interval rate towards and away from ping +#' @param fill.na Only relevant for tacsat: If interval rate cannot be +#' calculated based on default or provided weight, take closest alternative to +#' provide an interval rate +#' @author Niels T. Hintzen +#' @seealso \code{\link{raiseTacsat}} +#' @examples +#' +#' data(eflalo) +#' data(tacsat) +#' #-Remove duplicated records from tacsat +#' myf <- paste(tacsat$VE_REF,tacsat$SI_LATI,tacsat$SI_LONG, +#' tacsat$SI_DATE,tacsat$SI_TIME); +#' tacsat <- tacsat[!duplicated(myf),]; +#' +#' #- Try some out for eflalo +#' a1 <- effort(eflalo,by=c("FT_REF"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a1$EFFORT) +#' a2 <- effort(eflalo,by=c("SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a2$EFFORT) +#' a3 <- effort(eflalo,by=c("VE_REF","SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a3$EFFORT) +#' a4 <- effort(eflalo,by=c("VE_REF","SI_QUARTER","LE_GEAR"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a4$EFFORT) +#' a5 <- effort(eflalo,by=c("byRow"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a5$EFFORT) +#' a6 <- effort(eflalo,by=c("SI_DAY","LE_GEAR"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(a6$EFFORT) +#' +#' #- Try some out for tacsat +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)] +#' b1 <- effort(tacsatp,by=c("FT_REF"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b1$EFFORT) +#' b2 <- effort(tacsatp,by=c("SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b2$EFFORT) +#' b3 <- effort(tacsatp,by=c("VE_REF","SI_MONTH"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b3$EFFORT) +#' b4 <- effort(tacsatp,by=c("VE_REF","SI_QUARTER","LE_GEAR"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b4$EFFORT) +#' b5 <- effort(tacsatp,by=c("byRow"),unit="hours",weight=c(0.5,0.5),fill.na=TRUE); sum(b5$EFFORT,na.rm=T) +#' @export effort effort <- function(x,by="FT_REF",unit="hours",weight=c(0.5,0.5),fill.na=FALSE){ dattype <- ifelse(all(c("SI_LATI","SI_LONG") %in% colnames(x)),"tacsat","eflalo") diff --git a/vmstools/R/eflalo2Pings.r b/vmstools/R/eflalo2Pings.r index a2a510a..41d2a16 100644 --- a/vmstools/R/eflalo2Pings.r +++ b/vmstools/R/eflalo2Pings.r @@ -1,32 +1,52 @@ - eflalo2Pings <- function(eflalo,tacsat,pings,vars,eflaloCol,remainTacsat,by=NULL){ - #- Merge landings and values to get unique eflalo set given 'totVars' - for(iVars in 1:length(vars)){ - eflalo[,vars[iVars]] <- af(ac(eflalo[,vars[iVars]])) - tacsat[,vars[iVars]] <- af(ac(tacsat[,vars[iVars]])) - } - - DT <- data.table(eflalo) - eq1 <- c.listquote(paste("sum(",colnames(eflalo[,kgeur(colnames(eflalo))]),",na.rm=TRUE)",sep="")) - eq2 <- c.listquote(vars) - - eflalo <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(eflalo) <- c(vars,eflaloCol) - eflalo$ID <- 1:nrow(eflalo) - - #- Merge eflalo to pings to get number of pings per eflalo record - byPing <- merge(eflalo,data.frame(pings),by=vars,all=FALSE) - byTacsat <- merge(tacsat,byPing,by=vars,all=FALSE) - if(is.null(by)==FALSE) - byTacsat$pings <- byTacsat$pings / byTacsat[,by] - - try(print(paste("kg in eflalo",round(sum(byPing [,kgeur(colnames(byPing))]))))) - try(print(paste("kg in merged tacsat",round(sum(sweep(byTacsat[,kgeur(colnames(byTacsat))],1,byTacsat$pings,"/")))))) - - #- Bookkeeping which tacsat ID's have been merged and which have not yet been merged - remainTacsat <- remainTacsat[which(!remainTacsat %in% byTacsat$ID.x)] - - #- Bookkeeping which eflalo catches have been merged and which have not yet been merged - idx <- sort(unique(byPing$ID)) - try(print(paste("kg removed from eflalo",round(sum(eflalo[idx,kgeur(colnames(eflalo))]))))) - eflalo[idx,kgeur(colnames(eflalo))] <- 0 - - return(list(eflalo=eflalo,tacsat=byTacsat,remainTacsat=remainTacsat))} \ No newline at end of file +#' Merge eflalo to tacsat pings +#' +#' Internal function of splitAmongPings to merge eflalo landings or values to +#' tacsat pings +#' +#' +#' @param eflalo eflalo dataset +#' @param tacsat tacsat dataset +#' @param pings number of pings by variable +#' @param vars variable to merge eflalo to tacsat +#' @param eflaloCol column names of eflalo +#' @param remainTacsat number of tacsat pings that have not been merged yet +#' @return Returns a list of the eflalo dataset, but without the landings and +#' values that have been merged, returns the merged tacsat dataset and returns +#' the number tacsat pings that have not been merged yet +#' @author Niels T. Hintzen +#' @seealso \code{\link{splitAmongPings}}, \code{\link{mergeEflalo2Tacsat}}, +#' \code{\link{mergeEflalo2Pings}} +#' @references EU Lot 2 project +#' @export eflalo2Pings + eflalo2Pings <- function(eflalo,tacsat,pings,vars,eflaloCol,remainTacsat,by=NULL){ + #- Merge landings and values to get unique eflalo set given 'totVars' + for(iVars in 1:length(vars)){ + eflalo[,vars[iVars]] <- af(ac(eflalo[,vars[iVars]])) + tacsat[,vars[iVars]] <- af(ac(tacsat[,vars[iVars]])) + } + + DT <- data.table(eflalo) + eq1 <- c.listquote(paste("sum(",colnames(eflalo[,kgeur(colnames(eflalo))]),",na.rm=TRUE)",sep="")) + eq2 <- c.listquote(vars) + + eflalo <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(eflalo) <- c(vars,eflaloCol) + eflalo$ID <- 1:nrow(eflalo) + + #- Merge eflalo to pings to get number of pings per eflalo record + byPing <- merge(eflalo,data.frame(pings),by=vars,all=FALSE) + byTacsat <- merge(tacsat,byPing,by=vars,all=FALSE) + if(is.null(by)==FALSE) + byTacsat$pings <- byTacsat$pings / byTacsat[,by] + + try(print(paste("kg in eflalo",round(sum(byPing [,kgeur(colnames(byPing))]))))) + try(print(paste("kg in merged tacsat",round(sum(sweep(byTacsat[,kgeur(colnames(byTacsat))],1,byTacsat$pings,"/")))))) + + #- Bookkeeping which tacsat ID's have been merged and which have not yet been merged + remainTacsat <- remainTacsat[which(!remainTacsat %in% byTacsat$ID.x)] + + #- Bookkeeping which eflalo catches have been merged and which have not yet been merged + idx <- sort(unique(byPing$ID)) + try(print(paste("kg removed from eflalo",round(sum(eflalo[idx,kgeur(colnames(eflalo))]))))) + eflalo[idx,kgeur(colnames(eflalo))] <- 0 + + return(list(eflalo=eflalo,tacsat=byTacsat,remainTacsat=remainTacsat))} diff --git a/vmstools/R/eflalo2relational.r b/vmstools/R/eflalo2relational.r index 1a8eb11..d108901 100644 --- a/vmstools/R/eflalo2relational.r +++ b/vmstools/R/eflalo2relational.r @@ -1,28 +1,46 @@ -eflalo2relational <- function(x){ - idxkg <- grep("KG",colnames(x)) - idxeuro <- grep("EURO",colnames(x)) - selectColsKG <- apply(x[,idxkg], 1,FUN=function(y){idx <- which(y >0); - return(y[idx])}) - selectColsEURO <- apply(x[,idxeuro],1,FUN=function(y){idx <- which(y >0); - return(y[idx])}) - - - idxNonkgeur <- which(!1:length(colnames(x)) %in% kgeur(colnames(x))) - xRelationalList <- lapply(as.list(1:length(selectColsKG)),function(y){ - newX <- data.frame(x[y,idxNonkgeur]) - newX$LE_SP <- strsplit(names(selectColsKG[[y]][1]),"_")[[1]][3] - newX$LE_KG <- selectColsKG[[y]][1] - newX$LE_EURO <- selectColsEURO[[y]][1] - if(length(selectColsKG[[y]])>1){ - for(j in 2:length(selectColsKG[[y]])){ - newX <- rbind(newX, - data.frame(x[y,idxNonkgeur],LE_SP=strsplit(names(selectColsKG[[y]][j]),"_")[[1]][3], - LE_KG=selectColsKG[[y]][j], - LE_EURO=selectColsEURO[[y]][j])) - } - } - return(newX)}) - xRelational <- do.call(rbind,xRelationalList) -return(xRelational)} -#eflalo2 <- eflalo2relational(eflalo) - +#' Turn eflalo dataset into relational format +#' +#' Turn the column setup of eflalo into a row setup where each species catch +#' has its own row +#' +#' May take a long time for long eflalo datasets +#' +#' @param x Dataframe with eflalo data and eflalo format +#' @author Niels T. Hintzen +#' @seealso \code{\link{formatEflalo}},\code{\link{readEflalo}} +#' @references EU Lot 2 project +#' @examples +#' +#' data(eflalo) +#' eflalo <- eflalo[1:20,] +#' eflaloRel <- eflalo2relational(eflalo) +#' +#' @export eflalo2relational +eflalo2relational <- function(x){ + idxkg <- grep("KG",colnames(x)) + idxeuro <- grep("EURO",colnames(x)) + selectColsKG <- apply(x[,idxkg], 1,FUN=function(y){idx <- which(y >0); + return(y[idx])}) + selectColsEURO <- apply(x[,idxeuro],1,FUN=function(y){idx <- which(y >0); + return(y[idx])}) + + + idxNonkgeur <- which(!1:length(colnames(x)) %in% kgeur(colnames(x))) + xRelationalList <- lapply(as.list(1:length(selectColsKG)),function(y){ + newX <- data.frame(x[y,idxNonkgeur]) + newX$LE_SP <- strsplit(names(selectColsKG[[y]][1]),"_")[[1]][3] + newX$LE_KG <- selectColsKG[[y]][1] + newX$LE_EURO <- selectColsEURO[[y]][1] + if(length(selectColsKG[[y]])>1){ + for(j in 2:length(selectColsKG[[y]])){ + newX <- rbind(newX, + data.frame(x[y,idxNonkgeur],LE_SP=strsplit(names(selectColsKG[[y]][j]),"_")[[1]][3], + LE_KG=selectColsKG[[y]][j], + LE_EURO=selectColsEURO[[y]][j])) + } + } + return(newX)}) + xRelational <- do.call(rbind,xRelationalList) +return(xRelational)} +#eflalo2 <- eflalo2relational(eflalo) + diff --git a/vmstools/R/eflaloHaul2Tacsat.r b/vmstools/R/eflaloHaul2Tacsat.r index ac637c7..669252d 100644 --- a/vmstools/R/eflaloHaul2Tacsat.r +++ b/vmstools/R/eflaloHaul2Tacsat.r @@ -1,31 +1,44 @@ -eflaloHaul2Tacsat <- function(x,npoints=NULL){ - - if(is.null(npoints)==T) - npoints <- 10 - if(length(npoints)!=nrow(x)) - npoints <- rep(npoints,nrow(x)) - - x$LE_SDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_STIME),format= "%d/%m/%Y %H:%M") - x$LE_EDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_ETIME),format= "%d/%m/%Y %H:%M") - - #- Create tacsat template - y <- data.frame(do.call(cbind,as.list(rep(NA,9)))) - colnames(y) <- c("VE_COU","VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE","FT_REF") - - taLst <- lapply(as.list(1:nrow(x)),function(z){ - ta <- y[rep(1,npoints[z]),] - ta$VE_COU[] <- x$VE_COU[z] - ta$VE_REF[] <- x$VE_REF[z] - ta$SI_LATI[] <- seq(x$LE_SLAT[z],x$LE_ELAT[z],length.out=npoints[z]) - ta$SI_LONG[] <- seq(x$LE_SLON[z],x$LE_ELON[z],length.out=npoints[z]) - ta$SI_DATE[] <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%d/%m/%Y") - ta$SI_TIME[] <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%H:%M") - ta$SI_SP[] <- c(distance(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z]) / 1.852) / - c(difftime(x$LE_EDATIM[z],x$LE_SDATIM[z],units="hours")) - ta$SI_HE[] <- bearing(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z]) - ta$FT_REF[] <- x$FT_REF[z] - return(ta)}) - - ta <- do.call(rbind,taLst) - return(ta)} - +#' Convert eflalo (haul by haul / log-event) to tacsat format +#' +#' Convert haul by haul / log event organised eflalo data to tacsat relational +#' style +#' +#' +#' @param x eflalo formatted dataset with haul-by-haul data (e.g. +#' LE_STIME,LE_SLON) +#' @param npoints Number of intermediate points to include (defaults to 10) +#' @return Tacsat formatted dataset is returned +#' @author Niels T. Hintzen +#' @seealso \code{\link{eflalo2Relational}} +#' @export eflaloHaul2Tacsat +eflaloHaul2Tacsat <- function(x,npoints=NULL){ + + if(is.null(npoints)==T) + npoints <- 10 + if(length(npoints)!=nrow(x)) + npoints <- rep(npoints,nrow(x)) + + x$LE_SDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_STIME),format= "%d/%m/%Y %H:%M") + x$LE_EDATIM <- as.POSIXct(paste(x$LE_CDAT,x$LE_ETIME),format= "%d/%m/%Y %H:%M") + + #- Create tacsat template + y <- data.frame(do.call(cbind,as.list(rep(NA,9)))) + colnames(y) <- c("VE_COU","VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE","FT_REF") + + taLst <- lapply(as.list(1:nrow(x)),function(z){ + ta <- y[rep(1,npoints[z]),] + ta$VE_COU[] <- x$VE_COU[z] + ta$VE_REF[] <- x$VE_REF[z] + ta$SI_LATI[] <- seq(x$LE_SLAT[z],x$LE_ELAT[z],length.out=npoints[z]) + ta$SI_LONG[] <- seq(x$LE_SLON[z],x$LE_ELON[z],length.out=npoints[z]) + ta$SI_DATE[] <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%d/%m/%Y") + ta$SI_TIME[] <- format(seq(x$LE_SDATIM[z],x$LE_EDATIM[z],length.out=npoints[z]),"%H:%M") + ta$SI_SP[] <- c(distance(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z]) / 1.852) / + c(difftime(x$LE_EDATIM[z],x$LE_SDATIM[z],units="hours")) + ta$SI_HE[] <- bearing(x$LE_SLON[z],x$LE_SLAT[z],x$LE_ELON[z],x$LE_ELAT[z]) + ta$FT_REF[] <- x$FT_REF[z] + return(ta)}) + + ta <- do.call(rbind,taLst) + return(ta)} + diff --git a/vmstools/R/equalDistance.r b/vmstools/R/equalDistance.r index e881377..02ccf46 100644 --- a/vmstools/R/equalDistance.r +++ b/vmstools/R/equalDistance.r @@ -1,28 +1,62 @@ -equalDistance <- function(interpolation,res=10){ - - #Calculate ditance of all interpolations at the same time - totDist <- distanceInterpolation(interpolation) - #Get dimensions of interpolations - lngInt <- lapply(interpolation,dim) - - #Warn if resolution of equal distance is too high compared to original resolution of interpolation - if(min(unlist(lngInt)[seq(1,length(totDist),2)],na.rm=TRUE) < 9*res) warnings("Number of intermediate points in the interpolation might be too small for the equal distance pionts chosen") - - #Get distance steps to get equal distance - eqStep <- totDist/(res-1) - - #Get x-y values of all interpolations - intidx <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,byrow=TRUE) - - #Do the calculation - result <- lapply(interpolation,function(ind){ - i <- which(intidx[,1] == ind[1,1] & intidx[,2] == ind[1,2]) - idx <- apply(abs(outer( - cumsum(distance(ind[3:lngInt[[i]][1],1],ind[3:lngInt[[i]][1],2],ind[2:(lngInt[[i]][1]-1),1],ind[2:(lngInt[[i]][1]-1),2])), - seq(eqStep[i],totDist[i],eqStep[i]), - "-")), - 2,which.min)+1 - idx <- c(1,idx) - return(ind[c(1,idx+1),])}) - #Return the equal distance interpolated set in the same format as the interpolated dataset (as a list) - return(result)} +#' Interpolated points at equal distance +#' +#' Returns the interpolated dataset with only those points remaining that are +#' at equal eucledian distance from each other, with the number of points to +#' retreive remaining. +#' +#' +#' @param interpolation interpolated dataset obtained from the interpolation() +#' function +#' @param res number of points to retreive from function +#' @author Niels T. Hintzen +#' @seealso \code{\link{filterTacsat}}, \code{\link{tacsat}}, +#' \code{\link{interpolateTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' +#' #Sort the VMS data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the Tacsat data +#' tacsat <- filterTacsat(tacsat,st=c(2,6),hd=NULL) +#' +#' #Interpolate the VMS data +#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10, +#' res=100,method="cHs",params=list(fm=0.5,distscale=20, +#' sigline=0.2,st=c(2,6)),headingAdjustment=0) +#' +#' #Get a set back with only 10 points per interpolation at equal distance +#' ed_interpolation <- equalDistance(interpolation,10) +#' +#' @export equalDistance +equalDistance <- function(interpolation,res=10){ + + #Calculate ditance of all interpolations at the same time + totDist <- distanceInterpolation(interpolation) + #Get dimensions of interpolations + lngInt <- lapply(interpolation,dim) + + #Warn if resolution of equal distance is too high compared to original resolution of interpolation + if(min(unlist(lngInt)[seq(1,length(totDist),2)],na.rm=TRUE) < 9*res) warnings("Number of intermediate points in the interpolation might be too small for the equal distance pionts chosen") + + #Get distance steps to get equal distance + eqStep <- totDist/(res-1) + + #Get x-y values of all interpolations + intidx <- matrix(unlist(lapply(interpolation,function(x){return(x[1,])})),ncol=2,byrow=TRUE) + + #Do the calculation + result <- lapply(interpolation,function(ind){ + i <- which(intidx[,1] == ind[1,1] & intidx[,2] == ind[1,2]) + idx <- apply(abs(outer( + cumsum(distance(ind[3:lngInt[[i]][1],1],ind[3:lngInt[[i]][1],2],ind[2:(lngInt[[i]][1]-1),1],ind[2:(lngInt[[i]][1]-1),2])), + seq(eqStep[i],totDist[i],eqStep[i]), + "-")), + 2,which.min)+1 + idx <- c(1,idx) + return(ind[c(1,idx+1),])}) + #Return the equal distance interpolated set in the same format as the interpolated dataset (as a list) + return(result)} diff --git a/vmstools/R/estimatePropFishing.r b/vmstools/R/estimatePropFishing.r index 130886e..ef14f0d 100644 --- a/vmstools/R/estimatePropFishing.r +++ b/vmstools/R/estimatePropFishing.r @@ -1,243 +1,259 @@ -estimatePropFishing <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF")){ - - if(!"SI_STATE" %in% colnames(tacsat)) - stop("Provide 'SI_STATE' column in tacsat dataset") - - - #----------------------------------------------------------------------------- - #- Add columns on dates - #----------------------------------------------------------------------------- - if(!"SI_DATIM" %in% colnames(tacsat)) - tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - if(!"FT_DDATIM" %in% colnames(eflalo)) - eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT") - if(!"FT_LDATIM" %in% colnames(eflalo)) - eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT") - if(!"INTV" %in% colnames(tacsat)) - stop("Specify time interval column in tacsat (e.g. use intervalTacsat)") - - #- Add date notation - if("SI_DAY" %in% unique(c(by))){ - eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_DAY <- yday(tacsat$SI_DATIM) - } - if("SI_YEAR" %in% unique(c(by))){ - eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_YEAR<- year(tacsat$SI_DATIM) - } - if("SI_MONTH"%in% unique(c(by))){ - eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_MONTH<- month(tacsat$SI_DATIM) - } - if("SI_WEEK"%in% unique(c(by))){ - eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_WEEK<- week(tacsat$SI_DATIM) - } - if("SI_QUARTER"%in% unique(c(by))){ - eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM) - } - - #- Select time levels + order these - timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR") - byTime <- by[which(by %in% timePos)] - byTime <- timePos[which(timePos %in% byTime)] - - #----------------------------------------------------------------------------- - #-Add spatial location - #----------------------------------------------------------------------------- - if("LE_RECT" %in% by) - tacsat$LE_RECT <- ICESrectangle(tacsat) - if("LE_ICESAREA" %in% by){ - data(ICESareas) - tacsat$LE_AREA <- ICESarea(tacsat,ICESareas) - tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER" - eflonlat <- ICESrectangle2LonLat(eflalo$LE_RECT) - eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas) - eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER" - } - - #- Select area levels + order these - areaPos<- c("LE_RECT","LE_ICESAREA") - byArea <- by[which(by %in% areaPos)] - byArea <- areaPos[which(areaPos %in% byArea)] - - #----------------------------------------------------------------------------- - #- Calculate time possible to spend per day fishing - #----------------------------------------------------------------------------- - eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT") - eflalo$INTV <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")) - eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0), - c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")), - ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), - c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")), - ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), - c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")), - 1440))) - - # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip - eflalo$dummy <- 1 - eflalo <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T) - colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF" - if("SI_DAY" %in% by){ - eflalo$INTVDAY <- eflalo$FT_DURDAY / eflalo$NR_FT_REF - } else { - eflalo$INTVDAY <- eflalo$INTV / eflalo$NR_FT_REF - } - eflalo <- eflalo[,-grep("dummy",colnames(eflalo))] - eflalo <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))] - eflalo <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))] - - #----------------------------------------------------------------------------- - #- Check if all colums in both tacsat and eflalo are available - #----------------------------------------------------------------------------- - if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0) - stop("elements specified in 'by' are not available as columns in either tacsat or eflalo") - - #----------------------------------------------------------------------------- - #- Estimate proportion fishing? (estimate on a basis which proportion of being out of harbour - # is used to fish and use that proportion to rescale eflalo effort) - #----------------------------------------------------------------------------- - - subTacsat <- subset(tacsat,SI_STATE != 0 & FT_REF != 0) - subEflalo <- eflalo - - if("SI_DAY" %in% by){ - subEflalo$INTVTRIP <- subEflalo$INTV - subEflalo$INTV <- subEflalo$INTVDAY - } else { - subEflalo <- subEflalo[!duplicated(subEflalo$FT_REF),] - } - if("LE_GEAR" %in% by & !"LE_GEAR" %in% colnames(subTacsat)) - subTacsat$LE_GEAR <- subEflalo$LE_GEAR[match(subTacsat$FT_REF,subEflalo$FT_REF)] - if(!"FT_REF" %in% colnames(subTacsat)) - stop("FT_REF column missing from tacsat") - - #- Calculate effort in tacsat and eflalo on a trip basis - if(!"FT_REF" %in% by){ - by <- c(by,"FT_REF") - warning("FT_REF has been added to 'by' statement") - } - byOrig <- by - idxE <- 1:nrow(eflalo) - idxEs <- 1:nrow(subEflalo) - idxTs <- 1:nrow(subTacsat) - - #- First go: aggregate INTV on specified level and compare with each other - if(length(by)>1){ - estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) - estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) - } else { - estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) - estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) - } - colnames(estEffTacsat) <- c(by,"INTV") - colnames(estEffEflalo) <- c(by,"INTV") - - #- Reset to original estimPropFish - mestEff <- merge(estEffTacsat,estEffEflalo,by=c(by)) - allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) - by <- by[-grep("FT_REF",by)] - - #- Calculate estimated proportion of fishing for each eflalo ping - if(length(by)>1){ - estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), - by=as.list(mestEff[allGZero,by]), - FUN=mean,na.rm=T) - } else { - estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), - by=list(mestEff[allGZero,by]), - FUN=mean,na.rm=T) - } - estimFish$x <- exp(estimFish$x) - colnames(estimFish) <- c(by,"x") - allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) - allMean <- exp(mean(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]))) - - #- Add fishing proportion parameter to eflalo (x) - eflalo$x[idxE] <- merge(eflalo[ idxE, ],estimFish,by=by,all.x=T)$x - subEflalo$x[idxEs] <- merge(subEflalo[idxEs,],estimFish,by=by,all.x=T)$x - idxE <- which(is.na( eflalo$x)==T) - idxEs <- which(is.na(subEflalo$x)==T) - idxTs <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in% - apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_")) - - #- Second go: aggregate INTV on specified level and compare with each other but drop one element time at time - if(length(idxTs)>0 & length(idxEs)>0){ - if(length(by)>1){ - for(i in 1:(length(by)-1)){ - by <- rev(rev(byOrig[-grep("FT_REF",byOrig)])[-c(1:i)]) - by <- c(by,"FT_REF") - - if(!"SI_DAY" %in% by){ - subEflalo <- subEflalo[!duplicated(subEflalo$FT_REF),] - subEflalo$INTV <- subEflalo$INTVTRIP - } - - if(length(by)>1){ - estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) - estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) - } else { - estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) - estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) - } - colnames(estEffEflalo) <- c(by,"INTV") - colnames(estEffTacsat) <- c(by,"INTV") - - mestEff <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c(by)) - allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) - - #- Reset to original estimPropFish - by <- by[-grep("FT_REF",by)] - if(length(by)>1){ - estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), - by=as.list(mestEff[allGZero,by]), - FUN=mean,na.rm=T) - } else { - estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), - by=list(mestEff[allGZero,by]), - FUN=mean,na.rm=T) - } - estimFish$x <- exp(estimFish$x) - colnames(estimFish) <- c(by,"x") - eflalo$x[idxE] <- merge(eflalo [idxE, -grep("x",colnames( eflalo))],estimFish,by=by,all.x=T)$x - subEflalo$x[idxEs] <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by=by,all.x=T)$x - idxE <- which(is.na( eflalo$x)==T) - idxEs <- which(is.na(subEflalo$x)==T) - idxTs <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in% - apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_")) - } - } - } - #- Third go: aggregate INTV on FT_REF level only - if(length(idxEs)>0 & length(idxTs)>0){ - if(!"SI_DAY" %in% by){ - subEflalo <- subEflalo[!duplicated(subEflalo$FT_REF),] - subEflalo$INTV <- subEflalo$INTVTRIP - } - estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,"FT_REF"]),FUN=sum,na.rm=T) - estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,"FT_REF"]),FUN=sum,na.rm=T) - colnames(estEffEflalo) <- c("FT_REF","INTV") - colnames(estEffTacsat) <- c("FT_REF","INTV") - mestEff <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c("FT_REF")) - allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) - estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), - by=list(mestEff[allGZero,"FT_REF"]), - FUN=mean,na.rm=T) - estimFish$x <- exp(estimFish$x) - colnames(estimFish) <- c("FT_REF","x") - eflalo$x[idxE] <- merge(eflalo [idxE, -grep("x",colnames( eflalo))],estimFish,by="FT_REF",all.x=T)$x - subEflalo$x[idxEs] <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by="FT_REF",all.x=T)$x - idxE <- which(is.na( eflalo$x)==T) - idxEs <- which(is.na(subEflalo$x)==T) - idxTs <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in% - apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_")) - } - if(length(idxEs)>0){ - subEflalo$x[idxEs] <- allMean - eflalo$x[idxE] <- allMean - colnames(subEflalo)[length(colnames(subEflalo))] <- "PropFish" - colnames(eflalo)[ length(colnames( eflalo))] <- "PropFish" - } - colnames(eflalo)[ length(colnames( eflalo))] <- "PropFish" -return(eflalo)} +#' Estimate the proportion fishing per fishing trip +#' +#' Estimate what proportion of time a fishing vessel is fishing when outside of +#' harbour +#' +#' +#' @param tacsat tacsat dataset, including FT_REF column +#' @param eflalo eflalo dataset +#' @param by column names to calculate the estimation over (e.g. LE_GEAR to +#' estimate proportion fishing by gear or VE_REF to estimate proportion fishing +#' by vessel ID) +#' @return Returns the eflalo dataset including a new column 'propFish' which +#' provides the estimated proportion fishing for each entry +#' @author Niels T. Hintzen +#' @seealso \code{\link{raiseTacsat}} +#' @export estimatePropFishing +estimatePropFishing <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF")){ + + if(!"SI_STATE" %in% colnames(tacsat)) + stop("Provide 'SI_STATE' column in tacsat dataset") + + + #----------------------------------------------------------------------------- + #- Add columns on dates + #----------------------------------------------------------------------------- + if(!"SI_DATIM" %in% colnames(tacsat)) + tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + if(!"FT_DDATIM" %in% colnames(eflalo)) + eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT") + if(!"FT_LDATIM" %in% colnames(eflalo)) + eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT") + if(!"INTV" %in% colnames(tacsat)) + stop("Specify time interval column in tacsat (e.g. use intervalTacsat)") + + #- Add date notation + if("SI_DAY" %in% unique(c(by))){ + eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_DAY <- yday(tacsat$SI_DATIM) + } + if("SI_YEAR" %in% unique(c(by))){ + eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_YEAR<- year(tacsat$SI_DATIM) + } + if("SI_MONTH"%in% unique(c(by))){ + eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_MONTH<- month(tacsat$SI_DATIM) + } + if("SI_WEEK"%in% unique(c(by))){ + eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_WEEK<- week(tacsat$SI_DATIM) + } + if("SI_QUARTER"%in% unique(c(by))){ + eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM) + } + + #- Select time levels + order these + timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR") + byTime <- by[which(by %in% timePos)] + byTime <- timePos[which(timePos %in% byTime)] + + #----------------------------------------------------------------------------- + #-Add spatial location + #----------------------------------------------------------------------------- + if("LE_RECT" %in% by) + tacsat$LE_RECT <- ICESrectangle(tacsat) + if("LE_ICESAREA" %in% by){ + data(ICESareas) + tacsat$LE_AREA <- ICESarea(tacsat,ICESareas) + tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER" + eflonlat <- ICESrectangle2LonLat(eflalo$LE_RECT) + eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas) + eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER" + } + + #- Select area levels + order these + areaPos<- c("LE_RECT","LE_ICESAREA") + byArea <- by[which(by %in% areaPos)] + byArea <- areaPos[which(areaPos %in% byArea)] + + #----------------------------------------------------------------------------- + #- Calculate time possible to spend per day fishing + #----------------------------------------------------------------------------- + eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT") + eflalo$INTV <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")) + eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0), + c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")), + ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), + c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")), + ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), + c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")), + 1440))) + + # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip + eflalo$dummy <- 1 + eflalo <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T) + colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF" + if("SI_DAY" %in% by){ + eflalo$INTVDAY <- eflalo$FT_DURDAY / eflalo$NR_FT_REF + } else { + eflalo$INTVDAY <- eflalo$INTV / eflalo$NR_FT_REF + } + eflalo <- eflalo[,-grep("dummy",colnames(eflalo))] + eflalo <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))] + eflalo <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))] + + #----------------------------------------------------------------------------- + #- Check if all colums in both tacsat and eflalo are available + #----------------------------------------------------------------------------- + if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0) + stop("elements specified in 'by' are not available as columns in either tacsat or eflalo") + + #----------------------------------------------------------------------------- + #- Estimate proportion fishing? (estimate on a basis which proportion of being out of harbour + # is used to fish and use that proportion to rescale eflalo effort) + #----------------------------------------------------------------------------- + + subTacsat <- subset(tacsat,SI_STATE != 0 & FT_REF != 0) + subEflalo <- eflalo + + if("SI_DAY" %in% by){ + subEflalo$INTVTRIP <- subEflalo$INTV + subEflalo$INTV <- subEflalo$INTVDAY + } else { + subEflalo <- subEflalo[!duplicated(subEflalo$FT_REF),] + } + if("LE_GEAR" %in% by & !"LE_GEAR" %in% colnames(subTacsat)) + subTacsat$LE_GEAR <- subEflalo$LE_GEAR[match(subTacsat$FT_REF,subEflalo$FT_REF)] + if(!"FT_REF" %in% colnames(subTacsat)) + stop("FT_REF column missing from tacsat") + + #- Calculate effort in tacsat and eflalo on a trip basis + if(!"FT_REF" %in% by){ + by <- c(by,"FT_REF") + warning("FT_REF has been added to 'by' statement") + } + byOrig <- by + idxE <- 1:nrow(eflalo) + idxEs <- 1:nrow(subEflalo) + idxTs <- 1:nrow(subTacsat) + + #- First go: aggregate INTV on specified level and compare with each other + if(length(by)>1){ + estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) + estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) + } else { + estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) + estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) + } + colnames(estEffTacsat) <- c(by,"INTV") + colnames(estEffEflalo) <- c(by,"INTV") + + #- Reset to original estimPropFish + mestEff <- merge(estEffTacsat,estEffEflalo,by=c(by)) + allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) + by <- by[-grep("FT_REF",by)] + + #- Calculate estimated proportion of fishing for each eflalo ping + if(length(by)>1){ + estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), + by=as.list(mestEff[allGZero,by]), + FUN=mean,na.rm=T) + } else { + estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), + by=list(mestEff[allGZero,by]), + FUN=mean,na.rm=T) + } + estimFish$x <- exp(estimFish$x) + colnames(estimFish) <- c(by,"x") + allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) + allMean <- exp(mean(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]))) + + #- Add fishing proportion parameter to eflalo (x) + eflalo$x[idxE] <- merge(eflalo[ idxE, ],estimFish,by=by,all.x=T)$x + subEflalo$x[idxEs] <- merge(subEflalo[idxEs,],estimFish,by=by,all.x=T)$x + idxE <- which(is.na( eflalo$x)==T) + idxEs <- which(is.na(subEflalo$x)==T) + idxTs <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in% + apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_")) + + #- Second go: aggregate INTV on specified level and compare with each other but drop one element time at time + if(length(idxTs)>0 & length(idxEs)>0){ + if(length(by)>1){ + for(i in 1:(length(by)-1)){ + by <- rev(rev(byOrig[-grep("FT_REF",byOrig)])[-c(1:i)]) + by <- c(by,"FT_REF") + + if(!"SI_DAY" %in% by){ + subEflalo <- subEflalo[!duplicated(subEflalo$FT_REF),] + subEflalo$INTV <- subEflalo$INTVTRIP + } + + if(length(by)>1){ + estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=as.list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) + estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=as.list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) + } else { + estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,by]),FUN=sum,na.rm=T) + estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,by]),FUN=sum,na.rm=T) + } + colnames(estEffEflalo) <- c(by,"INTV") + colnames(estEffTacsat) <- c(by,"INTV") + + mestEff <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c(by)) + allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) + + #- Reset to original estimPropFish + by <- by[-grep("FT_REF",by)] + if(length(by)>1){ + estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), + by=as.list(mestEff[allGZero,by]), + FUN=mean,na.rm=T) + } else { + estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), + by=list(mestEff[allGZero,by]), + FUN=mean,na.rm=T) + } + estimFish$x <- exp(estimFish$x) + colnames(estimFish) <- c(by,"x") + eflalo$x[idxE] <- merge(eflalo [idxE, -grep("x",colnames( eflalo))],estimFish,by=by,all.x=T)$x + subEflalo$x[idxEs] <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by=by,all.x=T)$x + idxE <- which(is.na( eflalo$x)==T) + idxEs <- which(is.na(subEflalo$x)==T) + idxTs <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in% + apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_")) + } + } + } + #- Third go: aggregate INTV on FT_REF level only + if(length(idxEs)>0 & length(idxTs)>0){ + if(!"SI_DAY" %in% by){ + subEflalo <- subEflalo[!duplicated(subEflalo$FT_REF),] + subEflalo$INTV <- subEflalo$INTVTRIP + } + estEffTacsat <- aggregate(subTacsat$INTV[idxTs],by=list(subTacsat[idxTs,"FT_REF"]),FUN=sum,na.rm=T) + estEffEflalo <- aggregate(subEflalo$INTV[idxEs],by=list(subEflalo[idxEs,"FT_REF"]),FUN=sum,na.rm=T) + colnames(estEffEflalo) <- c("FT_REF","INTV") + colnames(estEffTacsat) <- c("FT_REF","INTV") + mestEff <- merge(estEffTacsat[,colnames(estEffEflalo)],estEffEflalo,by=c("FT_REF")) + allGZero <- which(mestEff$INTV.x>0 & mestEff$INTV.y>0) + estimFish <- aggregate(log(mestEff$INTV.x[allGZero] / mestEff$INTV.y[allGZero]), + by=list(mestEff[allGZero,"FT_REF"]), + FUN=mean,na.rm=T) + estimFish$x <- exp(estimFish$x) + colnames(estimFish) <- c("FT_REF","x") + eflalo$x[idxE] <- merge(eflalo [idxE, -grep("x",colnames( eflalo))],estimFish,by="FT_REF",all.x=T)$x + subEflalo$x[idxEs] <- merge(subEflalo[idxEs,-grep("x",colnames(subEflalo))],estimFish,by="FT_REF",all.x=T)$x + idxE <- which(is.na( eflalo$x)==T) + idxEs <- which(is.na(subEflalo$x)==T) + idxTs <- which(!apply(t(t(subTacsat[,by])),1,paste,collapse="_") %in% + apply(t(t(eflalo[which(is.na(eflalo$x)==F),by])),1,paste,collapse="_")) + } + if(length(idxEs)>0){ + subEflalo$x[idxEs] <- allMean + eflalo$x[idxE] <- allMean + colnames(subEflalo)[length(colnames(subEflalo))] <- "PropFish" + colnames(eflalo)[ length(colnames( eflalo))] <- "PropFish" + } + colnames(eflalo)[ length(colnames( eflalo))] <- "PropFish" +return(eflalo)} diff --git a/vmstools/R/extractTableMainSpecies.r b/vmstools/R/extractTableMainSpecies.r index 9297ca2..cd78048 100644 --- a/vmstools/R/extractTableMainSpecies.r +++ b/vmstools/R/extractTableMainSpecies.r @@ -1,72 +1,149 @@ -################################################################ -# STEP 1 OF THE MULTIVARIATE CLASSIFICATION : # -# SELECT MAIN SPECIES OF THE DATASET # -# AND # -# EXTRACT A NEW DATASET WITH ONLY THE MAIN SPECIES # -################################################################ - - -extractTableMainSpecies = function(dat,namesMainSpeciesHAC,paramTotal=95,paramLogevent=100){ - - print("######## STEP 1 COMBINATION OF MAIN SPECIES FROM THE THREE EXPLORATORY METHODS ########") - t1 <- Sys.time() - - # TOTALE - - p=ncol(dat) # Number of species +1 - print("Calculating proportions...") - propdat=transformation_proportion(dat[,2:p]) - namesSpecies=colnames(propdat) - - # Total quantity caught species by species - sumcol=rep(as.numeric(NA),p-1) #numeric() - for(i in 2:p) sumcol[i-1]=sum(dat[,i]) - names(sumcol)=namesSpecies - - # Percent of each species in the total catch - propSp=sumcol/sum(sumcol,na.rm=TRUE)*100 - # Columns number of each species by decreasing order of capture - numSp=order(propSp,decreasing=TRUE) - # Percent of each species in the total catch by decreasing order - propSp=cumsum(propSp[order(propSp,decreasing=TRUE)]) - - Store(objects()) - gc(reset=TRUE) - - # We are taking all species until having at least param1% of total catch - if (is.null(paramTotal) | !is.numeric(paramTotal)) stop("param1 must be numeric between 0 and 100") - threshold=paramTotal - pourcent=which(propSp<=threshold) - # We are taking the name of selected species - selSpecies=numSp[1:(length(pourcent)+1)] - namesSelSpeciesTotal=namesSpecies[selSpecies] - - - - # LOGEVENT - - if (is.null(paramLogevent) | !is.numeric(paramLogevent)) stop("paramLogevent must be numeric between 0 and 100") - - threshold=paramLogevent - # Selection of species making up over param1% of logevent's captures - namesSelSpeciesLogevent=character() - for (i in namesSpecies) { - if (!is.na(any(propdat[,i]>=threshold)) && any(propdat[,i]>=threshold)) namesSelSpeciesLogevent=c(namesSelSpeciesLogevent,i) - } - - # Merge with explospecies - listSpeciesAll <- sort(unique(c(namesMainSpeciesHAC,namesSelSpeciesTotal,namesSelSpeciesLogevent))) - listSpeciesAll <- listSpeciesAll[!listSpeciesAll=="MZZ"] - - # We are building the table with main species and aggregated other species - datSpecies=building_tab_pca(propdat,listSpeciesAll) - rownames(datSpecies)=dat[,1] - - - print(" --- end of step 1 ---") - print(Sys.time()-t1) - - return(datSpecies) - -} - +################################################################ +# STEP 1 OF THE MULTIVARIATE CLASSIFICATION : # +# SELECT MAIN SPECIES OF THE DATASET # +# AND # +# EXTRACT A NEW DATASET WITH ONLY THE MAIN SPECIES # +################################################################ + + + + +#' Finding Metiers from a reduced EFLALO dataset, step 1 : selecting the +#' species considered as important for the analysis of target assemblages, by +#' crossing three different approaches. +#' +#' This function represents the first step in the multivariate analysis of +#' logbooks data for identifying metiers. +#' +#' A typical logbook dataset will contain a large number of species recorded, +#' but only a limited number of these could potentially be considered as target +#' species. This function builds further on the exploration conducted with the +#' function selectMainSpecies() by combining three different approaches : - HAC +#' (Hierarchical Ascending Classification) based on Euclidian distances between +#' species with Ward aggregating criteria; - Total, where species are ranked +#' based on their proportion in the total catches, and those cumulating to a +#' given percentage are retained - Logevent, where species are selected if they +#' represent at least a given percentage of at least one logevent (one line) +#' +#' All the three methods work on catch data transformed in percentage of +#' species by logevent (line), in order to remove the effect of large hauls +#' compared to small hauls. The HAC method is not run in this function, but in +#' the selectMainSpecies() function, which outputs are directly used here. In +#' addition, the relevant thresholds for the two other methods Total and +#' Logevent must be selected. +#' +#' +#' @param dat a data frame reduced from an eflalo format. It should contain +#' only the LE_ID (Logevent ID) variable as well as all species names in +#' columns, with raw catch data. It is necessary to sort out potential +#' error-prone lines (such as lines with only 0) prior to the analysis, and to +#' replace NA values by 0. +#' @param namesMainSpeciesHAC character. The names of species retained by the +#' HAC method selectMainSpecies() function. If omitted, only the two other +#' methods will be used for selecting species. +#' @param paramTotal numeric. The percentage threshold for the Total method. +#' All species ranked in terms of percentage in total catches and cumulating up +#' to this percentage value will be retained. Default value is 95\%. +#' @param paramLogevent numeric. The percentage threshold for the Logevent +#' method. All species representing at least this threshold of at least one +#' logevent (one line) will be retained. Default value is 100\% (logevents +#' catching one and only one species). +#' @return The function returns a numerical matrix with Logevents as lines and +#' species as columns, with percentage values (between 0 and 100) of each +#' species in the logevent catches. Logevent ID (LE_ID) are returned as row +#' names. +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @seealso \code{\link{selectMainSpecies}} +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @examples +#' +#' +#' data(eflalo) +#' +#' eflalo <- formatEflalo(eflalo) +#' +#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",] +#' # note that output plots will be sent to getwd() +#' analysisName <- "metier_analysis_OTB" +#' +#' dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))] +#' names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]])) +#' +#' explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE) +#' #=> send the LE_ID and LE_EURO_SP columns only +#' +#' Step1 <- extractTableMainSpecies(dat, explo$namesMainSpeciesHAC, +#' paramTotal=95, paramLogevent=100) +#' #=> send the LE_ID and LE_EURO_SP columns only +#' +#' +#' @export extractTableMainSpecies +extractTableMainSpecies = function(dat,namesMainSpeciesHAC,paramTotal=95,paramLogevent=100){ + + print("######## STEP 1 COMBINATION OF MAIN SPECIES FROM THE THREE EXPLORATORY METHODS ########") + t1 <- Sys.time() + + # TOTALE + + p=ncol(dat) # Number of species +1 + print("Calculating proportions...") + propdat=transformation_proportion(dat[,2:p]) + namesSpecies=colnames(propdat) + + # Total quantity caught species by species + sumcol=rep(as.numeric(NA),p-1) #numeric() + for(i in 2:p) sumcol[i-1]=sum(dat[,i]) + names(sumcol)=namesSpecies + + # Percent of each species in the total catch + propSp=sumcol/sum(sumcol,na.rm=TRUE)*100 + # Columns number of each species by decreasing order of capture + numSp=order(propSp,decreasing=TRUE) + # Percent of each species in the total catch by decreasing order + propSp=cumsum(propSp[order(propSp,decreasing=TRUE)]) + + Store(objects()) + gc(reset=TRUE) + + # We are taking all species until having at least param1% of total catch + if (is.null(paramTotal) | !is.numeric(paramTotal)) stop("param1 must be numeric between 0 and 100") + threshold=paramTotal + pourcent=which(propSp<=threshold) + # We are taking the name of selected species + selSpecies=numSp[1:(length(pourcent)+1)] + namesSelSpeciesTotal=namesSpecies[selSpecies] + + + + # LOGEVENT + + if (is.null(paramLogevent) | !is.numeric(paramLogevent)) stop("paramLogevent must be numeric between 0 and 100") + + threshold=paramLogevent + # Selection of species making up over param1% of logevent's captures + namesSelSpeciesLogevent=character() + for (i in namesSpecies) { + if (!is.na(any(propdat[,i]>=threshold)) && any(propdat[,i]>=threshold)) namesSelSpeciesLogevent=c(namesSelSpeciesLogevent,i) + } + + # Merge with explospecies + listSpeciesAll <- sort(unique(c(namesMainSpeciesHAC,namesSelSpeciesTotal,namesSelSpeciesLogevent))) + listSpeciesAll <- listSpeciesAll[!listSpeciesAll=="MZZ"] + + # We are building the table with main species and aggregated other species + datSpecies=building_tab_pca(propdat,listSpeciesAll) + rownames(datSpecies)=dat[,1] + + + print(" --- end of step 1 ---") + print(Sys.time()-t1) + + return(datSpecies) + +} + diff --git a/vmstools/R/filterTacsat.r b/vmstools/R/filterTacsat.r index 3088583..20f4f3c 100644 --- a/vmstools/R/filterTacsat.r +++ b/vmstools/R/filterTacsat.r @@ -1,37 +1,70 @@ -`filterTacsat` <- -function(tacsat - ,st=c(2,6) #Speed threshold points (two values), NULL means use all points - ,hd=NULL #Heading threshold points (two values), NULL means use all points - ,remDup=TRUE #Specify if you want to remove duplicated VMS records (these should not occur in the first place) - ){ - VMS <- tacsat -if(!"SI_DATIM" %in% colnames(VMS)) VMS$SI_DATIM <- as.POSIXct(paste(VMS$SI_DATE, VMS$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - #Remove duplicate records -if(is.null(remDup)==FALSE){ - uniqueVMS <- which(duplicated(VMS[,c("VE_REF","SI_DATIM")])==FALSE) - VMS. <- VMS[uniqueVMS,] - if(dim(VMS.)[1] != dim(VMS)[1]) warning("duplicate records have been removed") -} else { - VMS. <- VMS - } - -if(is.null(hd)==FALSE){ - warning("It is assumed that VMS data is sorted by vessel and date") - #Calculate the difference in heading between succeeding VMS datapoints - diffHeading <- abs(VMS.$SI_HE[2:dim(VMS.)[1]] - VMS.$SI_HE[1:(dim(VMS.)[1]-1)]) - #If there is a change to a new vessel in the data, put diffHeading to NA - diffHeading[unlist(lapply(as.list(unique(VMS.$VE_REF)),function(x){return(which(VMS.$VE_REF == x)[1])}))[-1]-1] <- NA - VMS.$diffHeading <- 0 - #Store the difference in heading - VMS.$diffHeading[1:(dim(VMS.)[1]-1)] <- diffHeading - #Subset based on the absolute difference in heading - VMS. <- subset(VMS.,diffHeading %in% seq(hd[1],hd[2],1)) -} - -if(is.null(st)==FALSE){ - #Subset based on the speed ranges - if(length(st)>2) stop("More than two speed values selected") - VMS. <- VMS.[which(VMS.$SI_SP >= st[1] & VMS.$SI_SP <= st[2]),] -} -return(VMS.[,-grep("SI_DATIM",colnames(VMS.))])} - +#' Filter out duplicated or out-of-range Tacsat datapoints +#' +#' Filter out duplicated Tacsat datapoints and possible remove Tacsat +#' datapoints that are outside a certain speed range or heading range. A new +#' Tacsat set, without the out-filtered records is returned. +#' +#' hd is computed as the change in heading from the previous Tacsat point (if +#' ship remains the same). Hence, hd does not equal the heading as given in the +#' Tacsat dataset +#' +#' @param tacsat Original VMS data.frame with columns speed, heading, ship and +#' date +#' @param st Speed thresholds in between records are kept. Begin and end +#' threshold or NULL +#' @param hd Heading thresholds in between records are kept. Begin and end +#' threshold or NULL +#' @param remDup Remove duplicates: True or False +#' @author Niels T. Hintzen +#' @seealso \code{\link{sortTacsat}} +#' @references EU lot 2 project, see also: Mills et al. 2006 +#' @examples +#' +#' data(tacsat) +#' +#' #Sort the VMS data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the VMS data +#' #A warning is given that duplicated VMS points have been removed +#' tacsat <- filterTacsat(tacsat,c(2,6),hd=NULL,remDup=TRUE) +#' +#' @export filterTacsat +`filterTacsat` <- +function(tacsat + ,st=c(2,6) #Speed threshold points (two values), NULL means use all points + ,hd=NULL #Heading threshold points (two values), NULL means use all points + ,remDup=TRUE #Specify if you want to remove duplicated VMS records (these should not occur in the first place) + ){ + VMS <- tacsat +if(!"SI_DATIM" %in% colnames(VMS)) VMS$SI_DATIM <- as.POSIXct(paste(VMS$SI_DATE, VMS$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + #Remove duplicate records +if(is.null(remDup)==FALSE){ + uniqueVMS <- which(duplicated(VMS[,c("VE_REF","SI_DATIM")])==FALSE) + VMS. <- VMS[uniqueVMS,] + if(dim(VMS.)[1] != dim(VMS)[1]) warning("duplicate records have been removed") +} else { + VMS. <- VMS + } + +if(is.null(hd)==FALSE){ + warning("It is assumed that VMS data is sorted by vessel and date") + #Calculate the difference in heading between succeeding VMS datapoints + diffHeading <- abs(VMS.$SI_HE[2:dim(VMS.)[1]] - VMS.$SI_HE[1:(dim(VMS.)[1]-1)]) + #If there is a change to a new vessel in the data, put diffHeading to NA + diffHeading[unlist(lapply(as.list(unique(VMS.$VE_REF)),function(x){return(which(VMS.$VE_REF == x)[1])}))[-1]-1] <- NA + VMS.$diffHeading <- 0 + #Store the difference in heading + VMS.$diffHeading[1:(dim(VMS.)[1]-1)] <- diffHeading + #Subset based on the absolute difference in heading + VMS. <- subset(VMS.,diffHeading %in% seq(hd[1],hd[2],1)) +} + +if(is.null(st)==FALSE){ + #Subset based on the speed ranges + if(length(st)>2) stop("More than two speed values selected") + VMS. <- VMS.[which(VMS.$SI_SP >= st[1] & VMS.$SI_SP <= st[2]),] +} +return(VMS.[,-grep("SI_DATIM",colnames(VMS.))])} + diff --git a/vmstools/R/findArea.r b/vmstools/R/findArea.r index 2c91cee..f66ba23 100644 --- a/vmstools/R/findArea.r +++ b/vmstools/R/findArea.r @@ -1,32 +1,70 @@ -findArea <- function(grid,threshold=100,diagonal=TRUE){ - - if(class(grid) != "SpatialGridDataFrame") stop(paste("Function not defined for class",class(grid))) - if(!"value" %in% colnames(grid@data)) stop("No 'value' column available in data slot") - grid <- surface(grid) - coords <- coordinates(grid) - - resx <- grid@grid@cellsize[1] - resy <- grid@grid@cellsize[2] - - storeVals <- data.frame(minval=rep(0,nrow(coords)),idxs=rep(NA,nrow(coords))) - for(i in 1:nrow(coords)){ - totSurf <- 0 - pts <- i - while(totSurf <= threshold){ - dists <- do.call(pmin, lapply(as.list(pts), function(x) { - distance(coords[x, 1], coords[x, 2], coords[-pts, 1], coords[-pts,2])})) - if(diagonal==TRUE) idx <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) { - max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]-resy), - distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]+resy))})))] - if(diagonal==FALSE) idx <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) { - max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]), - distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]), - distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]-resy), - distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]+resy))})))] - pts <- unique(c(pts,idx[which.min(grid@data$value[idx])])) - totSurf <- sum(grid@data$cellArea[pts]) - } - storeVals$minval[i] <- grid@data$value[pts] - storeVals$idxs[i] <- paste(pts,collapse= " ") - } - return(storeVals)} +#' Find area surface smaller than threshold +#' +#' Find gridcells with maximum surface equal or smaller to 'threshold' and +#' return their total value and gridcells involved +#' +#' Diagonal means if area may include x+1,y+1 steps v versus only x+1 or y+1 +#' steps +#' +#' @param grid A SpatialGridDataFrame +#' @param threshold Maximum value of surfaces added up +#' @param diagonal Allow diagonal steps in gridcell selection +#' @return data.frame with minimum surface area and gridcells selected +#' @author Niels T. Hintzen +#' @seealso \code{\link{surface}} +#' @examples +#' +#' xrange <- c(0,4) +#' yrange <- c(52,56) +#' resx <- 0.25 +#' resy <- 0.125 +#' +#' #-create grid and assign value column +#' grd <- createGrid(xrange,yrange,resx,resy,type="SpatialGridDataFrame",exactBorder=TRUE) +#' grd@data$value <- runif(nrow(coordinates(grd)),5,10) +#' +#' #- find gridcells with maximum surface equal or smaller to 'threshold' and return their +#' # total value and gridcells involved (diagonal means if area may include x+1,y+1 steps v +#' # versus only x+1 or y+1 steps). Note, the larger the threshold, the longer the function +#' # will run! +#' res <- findArea(grd,threshold=1000,diagonal=TRUE) +#' +#' #- Plot the result +#' plot(grd,type="p",pch=19,cex=0.5) +#' map.axes() +#' selec <- which.min(res$minval) +#' points(coordinates(grd)[as.numeric(unlist(strsplit(res[selec,"idxs"]," "))),],col=2,lwd=3) +#' +#' @export findArea +findArea <- function(grid,threshold=100,diagonal=TRUE){ + + if(class(grid) != "SpatialGridDataFrame") stop(paste("Function not defined for class",class(grid))) + if(!"value" %in% colnames(grid@data)) stop("No 'value' column available in data slot") + grid <- surface(grid) + coords <- coordinates(grid) + + resx <- grid@grid@cellsize[1] + resy <- grid@grid@cellsize[2] + + storeVals <- data.frame(minval=rep(0,nrow(coords)),idxs=rep(NA,nrow(coords))) + for(i in 1:nrow(coords)){ + totSurf <- 0 + pts <- i + while(totSurf <= threshold){ + dists <- do.call(pmin, lapply(as.list(pts), function(x) { + distance(coords[x, 1], coords[x, 2], coords[-pts, 1], coords[-pts,2])})) + if(diagonal==TRUE) idx <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) { + max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]-resy), + distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]+resy))})))] + if(diagonal==FALSE) idx <- (1:nrow(coords))[-pts][which(dists <= do.call(max, lapply(as.list(pts), function(x) { + max(distance(coords[x, 1], coords[x, 2], coords[x, 1]-resx, coords[x,2]), + distance(coords[x, 1], coords[x, 2], coords[x, 1]+resx, coords[x,2]), + distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]-resy), + distance(coords[x, 1], coords[x, 2], coords[x, 1], coords[x,2]+resy))})))] + pts <- unique(c(pts,idx[which.min(grid@data$value[idx])])) + totSurf <- sum(grid@data$cellArea[pts]) + } + storeVals$minval[i] <- grid@data$value[pts] + storeVals$idxs[i] <- paste(pts,collapse= " ") + } + return(storeVals)} diff --git a/vmstools/R/findEndTacsat.r b/vmstools/R/findEndTacsat.r index 08a1d94..709fda0 100644 --- a/vmstools/R/findEndTacsat.r +++ b/vmstools/R/findEndTacsat.r @@ -1,33 +1,75 @@ -`findEndTacsat` <- -function(SI_DATIM - ,startVMS #Starting point of VMS - ,interval #Specify in minutes, NULL means use all points - ,margin #Specify the margin in minutes it might deviate from the interval time, in minutes - ){ - - #Calculate the difference in time between the starting VMS point and its succeeding points - diffTime <- difftime(SI_DATIM[(startVMS+1):length(SI_DATIM)],SI_DATIM[startVMS],units=c("mins")) - if(length(which(diffTime >= (interval-margin) & diffTime <= (interval+margin)))==0){ - warning("No succeeding point found, no interpolation possible") - endVMS <- NA - endDataSet <- 3 - #Check if end of dataset has been reached - ifelse(all((diffTime < (interval-margin))==TRUE),endDataSet <- 1,endDataSet <- 0) - } else { - res <- which(diffTime >= (interval-margin) & diffTime <= (interval+margin)) - if(length(res)>1){ - res2 <- which.min(abs(interval-an(diffTime[res]))) - endVMS <- startVMS + res[res2] - endDataSet <- 0 - } else { - endVMS <- startVMS + res - endDataSet<- 0 - } - } - #Build-in check - if(is.na(endVMS)==FALSE){ - if(!an(difftime(SI_DATIM[endVMS],SI_DATIM[startVMS],units=c("mins"))) %in% seq((interval-margin),(interval+margin),1)) stop("found endVMS point not within interval range") - endVMS <- (endVMS - startVMS) - } -return(c(endVMS,endDataSet))} - +#' Finding the succeeding Tacsat datapoint based on an interval with a +#' specified margin +#' +#' To create an interpolation, two succeeding Tacsat datapoints are needed. +#' This function finds the succeeding Tacsat point and tests if the point is +#' within the specified time interval and margins. As well, if no succeeding +#' datapoint can be found, this information is returned +#' +#' Interval: In most Tacsat datasets the succeeding datapoint can be found 1 or +#' 2 hours appart. This interval time should be specified here. Interval can +#' also be specified as e.g. 15 minutes if the Tacsat / GPS dataset allows +#' this. Margin: Hardly ever, the interval time is precise. To allow some +#' deviation from the 1 or 2 hour interval the margin can be adjusted. +#' +#' The result returned consists of 2 values. The first value is the index of +#' the Tacsat set specified of the succeeding datapoint. The second value +#' indicates if the dataset has ended. If 1st: NA and 2nd 0 then no succeeding +#' Tacsat point could be found in the specified interval. If 1st: NA and 2nd 1 +#' then no succeeding Tacsat point could be found and end of dataset for a +#' specific vessel has been reached. If 1st: NA and 2nd 2 then no succeeding +#' Tacsat point could be found and end of complete dataset has been reached. If +#' 1st: value then 2nd will be 0, succeeding Tacsat point is found and is +#' specified in 1st value. +#' +#' @param tacsat The Tacsat dataset +#' @param startTacsat Index of Tacsat dataset of startpoint of interpolation +#' @param interval Time in minutes between the succeeding datapoints +#' @param margin Deviation from specified interval to find succeeding +#' datapoints +#' @note This function is called inside interpolateTacsat() +#' @author Niels T. Hintzen +#' @seealso \code{\link{filterTacsat}}, \code{\link{interpolateTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' startTacsat <- 2 +#' #result: 3 0 Succeeding point = tacsat[3,] +#' # and end dataset has not been reached yet. +#' findEndTacsat(tacsat,startTacsat,interval=120,margin=10) +#' +#' @export findEndTacsat +`findEndTacsat` <- +function(SI_DATIM + ,startVMS #Starting point of VMS + ,interval #Specify in minutes, NULL means use all points + ,margin #Specify the margin in minutes it might deviate from the interval time, in minutes + ){ + + #Calculate the difference in time between the starting VMS point and its succeeding points + diffTime <- difftime(SI_DATIM[(startVMS+1):length(SI_DATIM)],SI_DATIM[startVMS],units=c("mins")) + if(length(which(diffTime >= (interval-margin) & diffTime <= (interval+margin)))==0){ + warning("No succeeding point found, no interpolation possible") + endVMS <- NA + endDataSet <- 3 + #Check if end of dataset has been reached + ifelse(all((diffTime < (interval-margin))==TRUE),endDataSet <- 1,endDataSet <- 0) + } else { + res <- which(diffTime >= (interval-margin) & diffTime <= (interval+margin)) + if(length(res)>1){ + res2 <- which.min(abs(interval-an(diffTime[res]))) + endVMS <- startVMS + res[res2] + endDataSet <- 0 + } else { + endVMS <- startVMS + res + endDataSet<- 0 + } + } + #Build-in check + if(is.na(endVMS)==FALSE){ + if(!an(difftime(SI_DATIM[endVMS],SI_DATIM[startVMS],units=c("mins"))) %in% seq((interval-margin),(interval+margin),1)) stop("found endVMS point not within interval range") + endVMS <- (endVMS - startVMS) + } +return(c(endVMS,endDataSet))} + diff --git a/vmstools/R/formatEflalo.r b/vmstools/R/formatEflalo.r index f9a3a74..1949039 100644 --- a/vmstools/R/formatEflalo.r +++ b/vmstools/R/formatEflalo.r @@ -1,37 +1,54 @@ -formatEflalo <- function(x){ - x$VE_REF <- ac(x$VE_REF) - x$VE_FLT <- ac(x$VE_FLT) - x$VE_COU <- ac(x$VE_COU) - x$VE_LEN <- an(ac(x$VE_LEN)) - x$VE_KW <- an(ac(x$VE_KW)) - if("VE_TON" %in% colnames(x)) x$VE_TON <- an(ac(x$VE_TON)) - x$FT_REF <- ac(x$FT_REF) - x$FT_DCOU <- ac(x$FT_DCOU) - x$FT_DHAR <- ac(x$FT_DHAR) - x$FT_DDAT <- ac(x$FT_DDAT) - x$FT_DTIME <- ac(x$FT_DTIME) - x$FT_LCOU <- ac(x$FT_LCOU) - x$FT_LHAR <- ac(x$FT_LHAR) - x$FT_LDAT <- ac(x$FT_LDAT) - x$FT_LTIME <- ac(x$FT_LTIME) - x$LE_ID <- ac(x$LE_ID) - x$LE_CDAT <- ac(x$LE_CDAT) - if("LE_UNIT" %in% colnames(x)) x$LE_UNIT <- ac(x$LE_UNIT) - if("LE_STIME" %in% colnames(x)) x$LE_STIME <- ac(x$LE_STIME) - if("LE_ETIME" %in% colnames(x)) x$LE_ETIME <- ac(x$LE_ETIME) - if("LE_SLAT" %in% colnames(x)) x$LE_SLAT <- an(ac(x$LE_SLAT)) - if("LE_SLON" %in% colnames(x)) x$LE_SLON <- an(ac(x$LE_SLON)) - if("LE_ELAT" %in% colnames(x)) x$LE_ELAT <- an(ac(x$LE_ELAT)) - if("LE_ELON" %in% colnames(x)) x$LE_ELON <- an(ac(x$LE_ELON)) - x$LE_GEAR <- ac(x$LE_GEAR) - x$LE_MSZ <- an(ac(x$LE_MSZ)) - x$LE_RECT <- ac(x$LE_RECT) - x$LE_DIV <- ac(x$LE_DIV) - if("LE_MET" %in% colnames(x)) x$LE_MET <- ac(x$LE_MET) - for(i in c(grep("_KG_",colnames(x)),grep("_EURO_",colnames(x)))) x[,i] <- an(ac(x[,i])) - return(x) -} - - - - +#' Format Eflalo data to ensure that all columns are in right format +#' +#' Reformat all the columns of the Eflalo data to ensure that all data is in +#' the right format +#' +#' +#' @param x eflalo file +#' @return Returns original Eflalo file but now with reformatted data +#' @author Niels T. Hintzen +#' @seealso \code{\link{formatTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(eflalo) +#' eflalo <- formatEflalo(eflalo) +#' +#' @export formatEflalo +formatEflalo <- function(x){ + x$VE_REF <- ac(x$VE_REF) + x$VE_FLT <- ac(x$VE_FLT) + x$VE_COU <- ac(x$VE_COU) + x$VE_LEN <- an(ac(x$VE_LEN)) + x$VE_KW <- an(ac(x$VE_KW)) + if("VE_TON" %in% colnames(x)) x$VE_TON <- an(ac(x$VE_TON)) + x$FT_REF <- ac(x$FT_REF) + x$FT_DCOU <- ac(x$FT_DCOU) + x$FT_DHAR <- ac(x$FT_DHAR) + x$FT_DDAT <- ac(x$FT_DDAT) + x$FT_DTIME <- ac(x$FT_DTIME) + x$FT_LCOU <- ac(x$FT_LCOU) + x$FT_LHAR <- ac(x$FT_LHAR) + x$FT_LDAT <- ac(x$FT_LDAT) + x$FT_LTIME <- ac(x$FT_LTIME) + x$LE_ID <- ac(x$LE_ID) + x$LE_CDAT <- ac(x$LE_CDAT) + if("LE_UNIT" %in% colnames(x)) x$LE_UNIT <- ac(x$LE_UNIT) + if("LE_STIME" %in% colnames(x)) x$LE_STIME <- ac(x$LE_STIME) + if("LE_ETIME" %in% colnames(x)) x$LE_ETIME <- ac(x$LE_ETIME) + if("LE_SLAT" %in% colnames(x)) x$LE_SLAT <- an(ac(x$LE_SLAT)) + if("LE_SLON" %in% colnames(x)) x$LE_SLON <- an(ac(x$LE_SLON)) + if("LE_ELAT" %in% colnames(x)) x$LE_ELAT <- an(ac(x$LE_ELAT)) + if("LE_ELON" %in% colnames(x)) x$LE_ELON <- an(ac(x$LE_ELON)) + x$LE_GEAR <- ac(x$LE_GEAR) + x$LE_MSZ <- an(ac(x$LE_MSZ)) + x$LE_RECT <- ac(x$LE_RECT) + x$LE_DIV <- ac(x$LE_DIV) + if("LE_MET" %in% colnames(x)) x$LE_MET <- ac(x$LE_MET) + for(i in c(grep("_KG_",colnames(x)),grep("_EURO_",colnames(x)))) x[,i] <- an(ac(x[,i])) + return(x) +} + + + + diff --git a/vmstools/R/formatTacsat.r b/vmstools/R/formatTacsat.r index ea8d52b..d7b989a 100644 --- a/vmstools/R/formatTacsat.r +++ b/vmstools/R/formatTacsat.r @@ -1,18 +1,35 @@ -formatTacsat <- function(x){ - if("VE_COU" %in% colnames(x)) x$VE_COU <- ac(x$VE_COU) - x$VE_REF <- ac(x$VE_REF) - x$SI_LATI <- anf(x$SI_LATI) - x$SI_LONG <- anf(x$SI_LONG) - x$SI_DATE <- ac(x$SI_DATE) - x$SI_TIME <- ac(x$SI_TIME) - x$SI_SP <- anf(x$SI_SP) - x$SI_HE <- anf(x$SI_HE) - if("SI_HARB" %in% colnames(x)) x$SI_HARB <- ac(x$SI_HARB) - if("SI_STATE" %in% colnames(x)) x$SI_STATE <- ac(x$SI_STATE) - if("SI_FT" %in% colnames(x)) x$SI_FT <- ac(x$SI_FT) - #Get rid of NAs in the long and lats - x <- x[!is.na(x$SI_LATI),] - x <- x[!is.na(x$SI_LONG),] - warnings("Those records where longitude and latitude were NA have been removed") - return(x) -} \ No newline at end of file +#' Format tacsat data to ensure that all columns are in right format +#' +#' Reformat all the columns of the tacsat data to ensure that all data is in +#' the right format +#' +#' +#' @param x tacsat file +#' @return Returns original tacsat file but now with reformatted data +#' @author Niels T. Hintzen +#' @seealso \code{\link{formatEflalo}} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' tacsat <- formatTacsat(tacsat) +#' +#' @export formatTacsat +formatTacsat <- function(x){ + if("VE_COU" %in% colnames(x)) x$VE_COU <- ac(x$VE_COU) + x$VE_REF <- ac(x$VE_REF) + x$SI_LATI <- anf(x$SI_LATI) + x$SI_LONG <- anf(x$SI_LONG) + x$SI_DATE <- ac(x$SI_DATE) + x$SI_TIME <- ac(x$SI_TIME) + x$SI_SP <- anf(x$SI_SP) + x$SI_HE <- anf(x$SI_HE) + if("SI_HARB" %in% colnames(x)) x$SI_HARB <- ac(x$SI_HARB) + if("SI_STATE" %in% colnames(x)) x$SI_STATE <- ac(x$SI_STATE) + if("SI_FT" %in% colnames(x)) x$SI_FT <- ac(x$SI_FT) + #Get rid of NAs in the long and lats + x <- x[!is.na(x$SI_LATI),] + x <- x[!is.na(x$SI_LONG),] + warnings("Those records where longitude and latitude were NA have been removed") + return(x) +} diff --git a/vmstools/R/getEflaloMetierLevel7.r b/vmstools/R/getEflaloMetierLevel7.r index 68f14f8..0effdf6 100644 --- a/vmstools/R/getEflaloMetierLevel7.r +++ b/vmstools/R/getEflaloMetierLevel7.r @@ -1,140 +1,214 @@ -getEflaloMetierLevel7=function(dat, analysisName, path, critData="EURO", runHACinSpeciesSelection=TRUE, paramTotal=95, paramLogevent=100, critPca="PCA_70", algoClust="CLARA"){ - - #------------------------------------------------------------------------- - # I. GETTING THE DATA IN AND CLEANING FOR MISSING AND NEGATIVE VALUES ETC - #------------------------------------------------------------------------- - - # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5). - data(correspLevel7to5) - - # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). - data(correspMixedMetier) - - timeStart=Sys.time() - - print("--- CREATING DIRECTORIES AND REDUCING THE EFLALO DATASET TO THE ONLY DATA NECESSARY FOR THE ANALYSIS ---") - cat("\n") - - # Creating the directory of the analysis - if (!file.exists(analysisName)) dir.create(analysisName) - setwd(file.path(path,analysisName)) - # Delete old R cache - if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) - - eflalo_ori = dat # Keeping this in cached memory for making the final merging at the end - Store(eflalo_ori) - - # ! KEEPING ONLY LE_ID AND THE OUTPUT YOU WANT TO GET (KG/EURO) - dat=dat[,c("LE_ID",grep(critData,names(dat),value=TRUE))] - dat[is.na(dat)]=0 - - # Removing negative and null values - null.value <- vector() - for (i in grep(critData,names(dat))) null.value <- c(null.value,which(dat[,i]<0)) - null.value <- c(null.value,which(apply(dat[,2:ncol(dat)],1,sum,na.rm=TRUE)==0)) - - if(length(null.value)!=0) {LogEvent.removed <- dat[sort(unique(null.value)),] ; dat <- dat[-sort(unique(null.value)),]} - - # Rename species names - names(dat)[-1]=unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]])) - - # Removing miscellaneous species - dat <- dat[,!names(dat)=="MZZ"] - save(dat, file="dat_cleaned.Rdata") - - - #---------------------------------------------------------------------------------------------------------- - # II. EXPLORING THE VARIOUS METHODS FOR IDENTIFYING MAIN SPECIES AND KEEPING THEM IN THE DATA SET (STEP 1) - #---------------------------------------------------------------------------------------------------------- - - print("--- EXPLORING THE DATA FOR SELECTION OF MAIN SPECIES ---") - cat("\n") - - # Exploration of main species - explo=selectMainSpecies(dat,analysisName,RunHAC=runHACinSpeciesSelection,DiagFlag=FALSE) - - # Step 1 : selection of main species - Step1=extractTableMainSpecies(dat,explo$namesMainSpeciesHAC,paramTotal=paramTotal,paramLogevent=paramLogevent) - - save(explo,Step1,file="Explo_Step1.Rdata") - - rowNamesSave <- row.names(Step1) - row.names(Step1) <- 1:nrow(Step1) - - #----------------------------- - # III. STEP 2 - PCA - NO-PCA - #----------------------------- - - if (!file.exists(critPca)) dir.create(critPca) - setwd(file.path(path,analysisName,critPca)) - if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) - - if (critPca=="PCA_70") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="70percents") else # criterion="70percents" - if (critPca=="PCA_SC") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="screetest") else # criterion="screetest" - if (critPca=="NO_PCA") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="nopca",criterion=NULL) - - row.names(Step1) <- rowNamesSave - row.names(Step2) <- rowNamesSave - - save(Step1,file="Step1.Rdata") - save(Step2,file="Step2.Rdata") - - #------------------------------------------------------- - # IV. STEP 3 - CLUSTERING METHOD : HAC, CLARA OR KMEANS - #------------------------------------------------------- - - if (!file.exists(algoClust)) dir.create(algoClust) - setwd(file.path(path,analysisName,critPca,algoClust)) - if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) - - if (algoClust=="HAC") Step3=getMetierClusters(Step1,Step2,analysisName,methMetier="hac",param1="euclidean",param2="ward") else - if (algoClust=="CLARA") Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="clara",param1="euclidean",param2=NULL) else - if (algoClust=="KMEANS") Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="kmeans",param1=NULL,param2=NULL) - - save(Step3,file="Step3.Rdata") - - - #------------------------------------------------ - # V. STEP 4 - COMPARISON WITH ORDINATION METHODS - #------------------------------------------------ - - compOrdin="CompOrdin" - if (!file.exists(compOrdin)) dir.create(compOrdin) - setwd(file.path(path,analysisName,critPca,algoClust,compOrdin)) - if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) - - if (algoClust=="HAC") clusters=Step3$clusters - if (algoClust=="CLARA") clusters=Step3$clusters$clustering - if (algoClust=="KMEANS") clusters=Step3$clusters$cluster - - compMetiers=compareToOrdination(dat=dat,Step2=Step2,clusters=clusters,targetSpecies=Step3$targetSpecies) - save(compMetiers,file="compMetiers.Rdata") - - - #------------------------------------- - # VI. STEP 5 - MERGING BACK TO EFLALO - #------------------------------------- - - # Choosing the final option - setwd(file.path(path,analysisName)) - - #load(paste(path,analysisName,critPca,algoClust,"Step3.Rdata",sep="/")) - - if(!nrow(dat)==nrow(Step3$LE_ID_clust)) print("--error : number of lines in step 3 not equal to input eflalo, please check!!--") - - dat <- cbind(dat,CLUSTER=Step3$LE_ID_clust[,"clust"]) - - # Now reload the full data set - if(length(null.value)==0){ - eflalo_ori[,"CLUSTER"]=Step3$LE_ID_clust[,"clust"] - }else{ - eflalo_ori[-sort(unique(null.value)),"CLUSTER"]=Step3$LE_ID_clust[,"clust"] - } - - cat("\n") - print("Congratulation !! You have now a fully working eflalo dataset with a metier Level 7 !") - cat("\n") - print(Sys.time()-timeStart) - - return(eflalo_ori) -} \ No newline at end of file +#' Finding Metiers from a full EFLALO dataset, general function calling all the +#' various steps of the analyses at once +#' +#' This function will run the various steps of the multivariate analyses in the +#' right order, allowing the full anlysis to be run at once. +#' +#' +#' @param dat a data.frame in eflalo format. It should be previously subsetted +#' for the desired region and gear of interest. +#' @param analysisName character, the name of the run. Used for the file name +#' of the plots. +#' @param path character, the path of the main directory where all data and +#' plots will be stored. A number of sub-directories will be created by the +#' function. +#' @param critData character. Choice if the analysis must be performed on +#' eflalo data in weight ("KG") or in value ("EURO") +#' @param runHACinSpeciesSelection Boolean. the HAC can be time consuming for +#' large datasets, and can therefore be switched off for quicker runs. +#' @param paramTotal numeric. The percentage threshold for the Total method. +#' All species ranked in terms of percentage in total catches and cumulating up +#' to this percentage value will be retained. Default value is 95\%. +#' @param paramLogevent numeric. The percentage threshold for the Logevent +#' method. All species representing at least this threshold of at least one +#' logevent (one line) will be retained. Default value is 100\% (logevents +#' catching one and only one species). +#' @param critPca character. Choice if a PCA must be run or not (="NO_PCA"), +#' and if yes, which criterion is used for selecting the number of axes, to be +#' chosen between scree test (="PCA_SC") or 70\% of cumulated inertia +#' (="PCA_70") +#' @param algoClust character. Choice of the clustering algorithm, either +#' "CLARA", "HAC" or "KMEANS". +#' @return +#' +#' The function returns the same eflalo data set as was input, with an +#' additional column giving the cluster name where each logevent has been +#' allocated. +#' +#' Beside, a number of sub-directories are created, with intermediate results +#' and plots. +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @seealso \code{\link{selectMainSpecies}}, +#' \code{\link{extractTableMainSpecies}}, \code{\link{getTableAfterPCA}}, +#' \code{\link{getMetierClusters}}, +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @examples +#' +#' +#' \dontrun{ +#' +#' data(eflalo) +#' +#' eflalo <- formatEflalo(eflalo) +#' +#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",] +#' +#' # Note that output plots will be sent to getwd() +#' path <- getwd() +#' analysisName <- "metier_analysis_OTB" +#' +#' # Return a fully working eflalo dataset with a metier at DCF Level7 for each +#' # logbook event +#' eflalo_metiers_level7 <- getEflaloMetierLevel7(eflalo, analysisName, path, +#' critData="EURO",runHACinSpeciesSelection=TRUE, paramTotal=95, +#' paramLogevent=100,critPca="PCA_70", algoClust="CLARA") +#' +#' } +#' +#' +#' @export getEflaloMetierLevel7 +getEflaloMetierLevel7=function(dat, analysisName, path, critData="EURO", runHACinSpeciesSelection=TRUE, paramTotal=95, paramLogevent=100, critPca="PCA_70", algoClust="CLARA"){ + + #------------------------------------------------------------------------- + # I. GETTING THE DATA IN AND CLEANING FOR MISSING AND NEGATIVE VALUES ETC + #------------------------------------------------------------------------- + + # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5). + data(correspLevel7to5) + + # Load the table linking mixed metiers (composed by 2 simple metiers) to their official code of mixed metiers level 5 (FAO - 3 characters). + data(correspMixedMetier) + + timeStart=Sys.time() + + print("--- CREATING DIRECTORIES AND REDUCING THE EFLALO DATASET TO THE ONLY DATA NECESSARY FOR THE ANALYSIS ---") + cat("\n") + + # Creating the directory of the analysis + if (!file.exists(analysisName)) dir.create(analysisName) + setwd(file.path(path,analysisName)) + # Delete old R cache + if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) + + eflalo_ori = dat # Keeping this in cached memory for making the final merging at the end + Store(eflalo_ori) + + # ! KEEPING ONLY LE_ID AND THE OUTPUT YOU WANT TO GET (KG/EURO) + dat=dat[,c("LE_ID",grep(critData,names(dat),value=TRUE))] + dat[is.na(dat)]=0 + + # Removing negative and null values + null.value <- vector() + for (i in grep(critData,names(dat))) null.value <- c(null.value,which(dat[,i]<0)) + null.value <- c(null.value,which(apply(dat[,2:ncol(dat)],1,sum,na.rm=TRUE)==0)) + + if(length(null.value)!=0) {LogEvent.removed <- dat[sort(unique(null.value)),] ; dat <- dat[-sort(unique(null.value)),]} + + # Rename species names + names(dat)[-1]=unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]])) + + # Removing miscellaneous species + dat <- dat[,!names(dat)=="MZZ"] + save(dat, file="dat_cleaned.Rdata") + + + #---------------------------------------------------------------------------------------------------------- + # II. EXPLORING THE VARIOUS METHODS FOR IDENTIFYING MAIN SPECIES AND KEEPING THEM IN THE DATA SET (STEP 1) + #---------------------------------------------------------------------------------------------------------- + + print("--- EXPLORING THE DATA FOR SELECTION OF MAIN SPECIES ---") + cat("\n") + + # Exploration of main species + explo=selectMainSpecies(dat,analysisName,RunHAC=runHACinSpeciesSelection,DiagFlag=FALSE) + + # Step 1 : selection of main species + Step1=extractTableMainSpecies(dat,explo$namesMainSpeciesHAC,paramTotal=paramTotal,paramLogevent=paramLogevent) + + save(explo,Step1,file="Explo_Step1.Rdata") + + rowNamesSave <- row.names(Step1) + row.names(Step1) <- 1:nrow(Step1) + + #----------------------------- + # III. STEP 2 - PCA - NO-PCA + #----------------------------- + + if (!file.exists(critPca)) dir.create(critPca) + setwd(file.path(path,analysisName,critPca)) + if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) + + if (critPca=="PCA_70") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="70percents") else # criterion="70percents" + if (critPca=="PCA_SC") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="pca",criterion="screetest") else # criterion="screetest" + if (critPca=="NO_PCA") Step2=getTableAfterPCA(Step1,analysisName,pcaYesNo="nopca",criterion=NULL) + + row.names(Step1) <- rowNamesSave + row.names(Step2) <- rowNamesSave + + save(Step1,file="Step1.Rdata") + save(Step2,file="Step2.Rdata") + + #------------------------------------------------------- + # IV. STEP 3 - CLUSTERING METHOD : HAC, CLARA OR KMEANS + #------------------------------------------------------- + + if (!file.exists(algoClust)) dir.create(algoClust) + setwd(file.path(path,analysisName,critPca,algoClust)) + if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) + + if (algoClust=="HAC") Step3=getMetierClusters(Step1,Step2,analysisName,methMetier="hac",param1="euclidean",param2="ward") else + if (algoClust=="CLARA") Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="clara",param1="euclidean",param2=NULL) else + if (algoClust=="KMEANS") Step3=getMetierClusters(Step1,Step2,analysisName=analysisName,methMetier="kmeans",param1=NULL,param2=NULL) + + save(Step3,file="Step3.Rdata") + + + #------------------------------------------------ + # V. STEP 4 - COMPARISON WITH ORDINATION METHODS + #------------------------------------------------ + + compOrdin="CompOrdin" + if (!file.exists(compOrdin)) dir.create(compOrdin) + setwd(file.path(path,analysisName,critPca,algoClust,compOrdin)) + if (file.exists(".R_Cache")) unlink(".R_Cache",recursive=TRUE) + + if (algoClust=="HAC") clusters=Step3$clusters + if (algoClust=="CLARA") clusters=Step3$clusters$clustering + if (algoClust=="KMEANS") clusters=Step3$clusters$cluster + + compMetiers=compareToOrdination(dat=dat,Step2=Step2,clusters=clusters,targetSpecies=Step3$targetSpecies) + save(compMetiers,file="compMetiers.Rdata") + + + #------------------------------------- + # VI. STEP 5 - MERGING BACK TO EFLALO + #------------------------------------- + + # Choosing the final option + setwd(file.path(path,analysisName)) + + #load(paste(path,analysisName,critPca,algoClust,"Step3.Rdata",sep="/")) + + if(!nrow(dat)==nrow(Step3$LE_ID_clust)) print("--error : number of lines in step 3 not equal to input eflalo, please check!!--") + + dat <- cbind(dat,CLUSTER=Step3$LE_ID_clust[,"clust"]) + + # Now reload the full data set + if(length(null.value)==0){ + eflalo_ori[,"CLUSTER"]=Step3$LE_ID_clust[,"clust"] + }else{ + eflalo_ori[-sort(unique(null.value)),"CLUSTER"]=Step3$LE_ID_clust[,"clust"] + } + + cat("\n") + print("Congratulation !! You have now a fully working eflalo dataset with a metier Level 7 !") + cat("\n") + print(Sys.time()-timeStart) + + return(eflalo_ori) +} diff --git a/vmstools/R/getMetierClusters.r b/vmstools/R/getMetierClusters.r index 423487e..ab7f2d3 100644 --- a/vmstools/R/getMetierClusters.r +++ b/vmstools/R/getMetierClusters.r @@ -1,1894 +1,2075 @@ -################################################################################ -# STEP 3 OF THE MULTIVARIATE CLASSIFICATION : # -# RUN THE CLUSTERING OF THE LOGEVENTS # -# 4 METHODS ARE AVALAIBLE : HAC / KMEANS / PAM / CLARA # -################################################################################ - - -getMetierClusters = function(datSpecies,datLog,analysisName="",methMetier="clara",param1="euclidean",param2=NULL){ - - # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5). - data(correspLevel7to5) - require(lattice) - - LE_ID=rownames(datSpecies) - nbSpec=ncol(datSpecies) - datSpecies=as.matrix(datSpecies,ncol=nbSpec,nrow=length(LE_ID)) - - print("######## STEP 3 CLUSTERING ########") - - t1=Sys.time() - print(paste(" --- selected method :",methMetier, " ---")) - - -######################################################################################################################################## HAC - - if(methMetier=="hac"){ - - classifWithinVar=numeric() - classifBetweenVar=numeric() - classifQuality=numeric() - sampleList=numeric() - mProfilSample=numeric() - classifVarExplain=numeric() - - nbLog=nrow(datLog) - nbDim=ncol(datLog) - - # Center of gravity of datLog - centerOfGravityDatLog=colMeans(datLog) - - # HAC like CLARA (HAC on sample, affectation of each logevent to a cluster, quality of classification, do it 5 times, choose the sample which gives the best quality of classification) - print("hac on subsets...") - - for(i in 1:5){ - - numSample=i - print(paste("sample",i)) - # Sample of size 10000 logevents or 30% of all logevents - minsam=min(nbLog,max(10000,round(nbLog*30/100))) - sam=sample(1:nbLog,size=minsam,replace=FALSE) - # Record the 5 samples - sampleList=rbind(sampleList,sam) - outofsam=setdiff(1:nbLog,sam) - sampleDatLog=datLog[sam,] - sampleDatSpecies=datSpecies[sam,] - - # HAC on the sample - log.hac=hcluster(sampleDatLog, method=param1, link=param2) - inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)] - nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3] - - # Cut the dendogram at the selected level - sampleClusters=cutree(log.hac,k=nbClust) - - Store(objects()) - gc(reset=TRUE) - - # Add the cluster to each logevent of the sample - sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters) - - sampleClusters=sampleDatLogWithClusters[,ncol(sampleDatLogWithClusters)] - - # Within and between variance of clusters and classification - centerOfGravityClassif=numeric() - withinVarClusters=numeric() - sizeClusti=numeric() - centerOfGravitySampleDatLog=colMeans(sampleDatLog) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravitySampleDatLog) - for(k in 1:nbClust){ # Within variance by cluster - - clusti=sampleDatLogWithClusters[which(sampleClusters==k),1:nbDim] - if(length(which(sampleClusters==k))==1) centerOfGravityClusti=clusti - else centerOfGravityClusti=colMeans(clusti) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) - sizeClusti[k]=length(which(sampleClusters==k)) - if(length(which(sampleClusters==k))==1) withinVarClusters[k]=0 - else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) - - } - # Between variance - classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) - # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification - withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - classifVarExplain=c(classifVarExplain,betweenVarClassifOnTot) - - - # Catch profile by cluster for each sample - nbSpec=ncol(datSpecies) - mprofil=numeric() - blank=rep(00,nbSpec) - for(k in 1:nbClust){ - mprofilclusti=colMeans(sampleDatSpecies[which(sampleClusters==k),]) - mprofil=rbind(mprofil,mprofilclusti) - } - mprofil=rbind(mprofil,blank) - - mProfilSample=rbind(mProfilSample,mprofil) - - - # Graphics - - # Calculation of each cluster size - sizeClusters=numeric() - for(k in 1:nbClust){ - sizeClusters[k]=length(which(sampleClusters==k)) - } - - # Compute the test-values for species - resval=test.values(sampleClusters,sampleDatSpecies) - # Determine the target species - target=targetspecies(resval) - - - # Projections on the first factorial plans - png(paste(analysisName,numSample,"Sample_Projections.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(2,3)) - plot(sampleDatLog[,1], sampleDatLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") - if(dim(datLog)[2]>2) { - plot(sampleDatLog[,2], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") - plot(sampleDatLog[,1], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") - if(dim(datLog)[2]>3) { - plot(sampleDatLog[,1], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") - plot(sampleDatLog[,2], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") - plot(sampleDatLog[,3], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") - }} - par(op) - dev.off() - - - # Rectangles plotting - png(paste(analysisName,numSample,"Sample_Dendogram.png",sep="_"), width = 1200, height = 800) - plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE) - title(main="HAC dendogram",xlab="Logevents",ylab="Height") - rect.hclust(log.hac, k=nbClust) - dev.off() - - - # Catch profile of the dataset - meanprofile=colMeans(sampleDatSpecies) - png(paste(analysisName,numSample,"Sample_Catch profile of the sample.png",sep="_"), width = 1200, height = 800) - op <- par(las=2) - barplot(meanprofile, main="Catch profile of the sample", xlab="Species", ylab="Percentage of catch") - par(op) - mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - dev.off() - - - # Catch profile by cluster - nbSpec=ncol(sampleDatSpecies) - summarySampleClusters=array(0,dim=c(6,nbSpec,nbClust)) - dimnames(summarySampleClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") - dimnames(summarySampleClusters)[[2]]=names(meanprofile) - dimnames(summarySampleClusters)[[3]]=paste("Cluster",1:nbClust) - for(k in 1:nbClust){ - if(sizeClusters[k]==1){ - summarySampleClusters[,,k]=apply(t(as.matrix(sampleDatSpecies[which(sampleClusters==k),])),2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - }else{ - summarySampleClusters[,,k]=apply(sampleDatSpecies[which(sampleClusters==k),],2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - } - } - # Species names for catch profile plots - nameSpPlot=character() - catchMeanThreshold=2 - for(k in 1:nbClust){ - namSpi=names(meanprofile[which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold)]) - numSpi=which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold) - nameSpPloti=rep("",nbSpec) - nameSpPloti[numSpi]=namSpi - nameSpPlot=rbind(nameSpPlot,nameSpPloti) - } - png(paste(analysisName,numSample,"Sample_Mean profile by cluster of the sample.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(k in 1:nbClust){ - op2 <- par(las=2) - barplot(t(summarySampleClusters["Mean",,k]), names.arg=nameSpPlot[k,], xlab="Species", ylab="Percentage of catch", col="gray") - par(op2) - mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Catch profile by cluster of the sample","\n","\n",sep="")) - dev.off() - - - # Standard deviation profile by cluster - sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) - namSdPlot=character() - SdThreshold=2 - for(k in 1:nbClust){ - if(length(which(sampleClusters==k))==1){ sdprofilclusti=rep(0,nbSpec) - }else{sdprofilclusti=apply(sampleDatSpecies[which(sampleClusters==k),],2,sd)} - namSDi=names(which(sdprofilclusti>SdThreshold)) - numSDi=which(sdprofilclusti>SdThreshold) - namSdPloti=rep("",nbSpec) - namSdPloti[numSDi]=namSDi - sdprofil[k,]=sdprofilclusti - namSdPlot=rbind(namSdPlot,namSdPloti) - } - rownames(sdprofil) <- 1:nrow(sdprofil) - png(paste(analysisName,numSample,"Sample_Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(k in 1:nbClust){ - op2 <- par(las=2) - barplot(sdprofil[k,], names.arg=namSdPlot[k,], xlab="Species", ylab="Percentage of catch") - par(op2) - mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) - dev.off() - - - # Number of Logevents by cluster - x=c(1:nbClust) - png(paste(analysisName,numSample,"Sample_Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) - coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") - barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") - text(coord,sizeClusters+100,sizeClusters,font=2,xpd=NA) - dev.off() - - - # Target Species profiles (test-value) - targetresval=numeric() - nameTargetPlot=character() - for(k in 1:nbClust){ - nomtargeti=as.character(target$tabnomespcib[k,which(!is.na(target$tabnumespcib[k,]))]) - numtargeti=as.numeric(target$tabnumespcib[k,which(!is.na(target$tabnumespcib[k,]))]) - nameTargetPloti=rep("",nbSpec) - nameTargetPloti[numtargeti]=nomtargeti - nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) - targetresvalclusti=rep(0,nbSpec) - targetresvalclusti[numtargeti]=resval[nomtargeti,k] - targetresval=rbind(targetresval,targetresvalclusti) - } - - png(paste(analysisName,numSample,"Sample_Profile of target species by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(k in 1:nbClust){ - op2 <- par(las=2) - barplot(targetresval[k,],names.arg=nameTargetPlot[k,], cex.names=1, xlab="Species", ylab="Test-value") - par(op2) - mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Profile of target species by cluster","\n","\n",sep="")) - dev.off() - - Store(objects()) - gc(reset=TRUE) - - } # end of for(i in 1:5) - - - - - # Select the sample which gives the smaller classification's quality (the best sample) - sam=sampleList[which.max(classifVarExplain),] - outofsam=setdiff(1:nbLog,sam) - sampleDatLog=datLog[sam,] - - nbLogSample=nrow(sampleDatLog) - nbDim=ncol(sampleDatLog) - - - # HAC with the best sample - print("Final HAC") - log.hac=hcluster(sampleDatLog, method=param1, link=param2) - - - # Determine the number of cluster thanks to the scree-test - inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)] - nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3] - - # Cut the dendogram at the selected level - sampleClusters=cutree(log.hac,k=nbClust) - sampleClusters=as.factor(sampleClusters) - - sampleDatLogWithClusters=data.frame() - sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters) - sampleDatLogWithClusters=as.data.frame(sampleDatLogWithClusters) - - # Discriminante analysis on the learning dataset - learning=lda(sampleClusters~.,data=sampleDatLogWithClusters) - - otherLog=datLog[outofsam,] - otherLog=as.data.frame(otherLog) - - # Predict the cluster for the other logevent - pred=predict(learning,otherLog) - otherDatLogWithClusters=cbind(otherLog,pred$class) - colnames(otherDatLogWithClusters)=colnames(sampleDatLogWithClusters) - - # Rebuilt complete datLog with clusters - clusters=numeric(length=nbLog) - clusters[sam]=sampleClusters - clusters[outofsam]=pred$class - - - # Within and between variance of clusters and classification - centerOfGravityClassif=numeric() - withinVarClusters=numeric() - sizeClusti=numeric() - centerOfGravityDatLog=colMeans(datLog) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) - for(k in 1:nbClust){ # Within variance by cluster - - clusti=datLog[which(clusters==k),1:nbDim] - if(length(which(clusters==k))==1) centerOfGravityClusti=clusti - else centerOfGravityClusti=colMeans(clusti) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) - sizeClusti[k]=length(which(clusters==k)) - if(length(which(clusters==k))==1) withinVarClusters[k]=0 - else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) - - } - # Between variance - classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) - # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification - withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - - - # Calculation of each cluster size - n=nrow(datLog) - sizeClusters=numeric() - for(k in 1:nbClust){ - sizeClusters[k]=length(which(clusters==k)) - } - - # Compute the test-values for species - resval=test.values(clusters,datSpecies) - # Determine the target species - target=targetspecies(resval) - rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) - - - # Compute the percentage of logevents catching each species by cluster - mainSpecies=colnames(datSpecies) - percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) - for(i in 1:nbClust){ - percLogevents[i,]=round(sapply(mainSpecies,function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1) - } - - - # Projections on the first factorial plans - png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(2,3)) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") - if(dim(datLog)[2]>2) { - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") - plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") - if(dim(datLog)[2]>3) { - plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") - plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") - plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") - }} - par(op) - dev.off() - - # For a paper - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 2", ylab="Axis 3") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_2_3_HAC',sep="_"), type='png') - dev.off() - - - # Rectangles plotting - png(paste(analysisName,"Dendogram.png",sep="_"), width = 1200, height = 800) - plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE) - title(main="HAC dendogram",xlab="Logevents",ylab="Height") - rect.hclust(log.hac, k=nbClust) - dev.off() - - - # Catch profile of the dataset - meanprofile=colMeans(datSpecies) - png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) - op <- par(las=2) - barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") - par(op) - mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - dev.off() - - - # Catch profile by cluster - nbSpec=ncol(datSpecies) - summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) - dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") - dimnames(summaryClusters)[[2]]=names(meanprofile) - dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - if(sizeClusters[i]==1){ - summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters==i),])),2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - }else{ - summaryClusters[,,i]=apply(datSpecies[which(clusters==i),],2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - } - } - # Species names for catch profile plots - nameSpPlot=character() - catchMeanThreshold=2 - for(i in 1:nbClust){ - namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) - numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) - nameSpPloti=rep("",nbSpec) - nameSpPloti[numSpi]=namSpi - nameSpPlot=rbind(nameSpPlot,nameSpPloti) - } - png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Catch profile by cluster","\n","\n",sep="")) - dev.off() - - - # For a paper : levelplot - X11(4,4) - mat <- t(summaryClusters["Mean",,]) - #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", - # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] - rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] - sp <- apply(mat,2, function(x) length(which(x>20))) - sp2=sp[which(sp>=1)] - #colnames(mat)[sp==0] <- "" - mat2=mat[,which(colnames(mat)%in%names(sp2))] - cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") - print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) - savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') - dev.off() - - - # Standard deviation profile by cluster - sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) - namSdPlot=character() - SdThreshold=2 - for(i in 1:nbClust){ - if(length(which(clusters==i))==1){ sdprofilclusti=rep(0,nbSpec) - }else{sdprofilclusti=apply(datSpecies[which(clusters==i),],2,sd)} - namSDi=names(which(sdprofilclusti>SdThreshold)) - numSDi=which(sdprofilclusti>SdThreshold) - namSdPloti=rep("",nbSpec) - namSdPloti[numSDi]=namSDi - sdprofil[i,]=sdprofilclusti - namSdPlot=rbind(namSdPlot,namSdPloti) - } - rownames(sdprofil) <- 1:nrow(sdprofil) - png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) - dev.off() - - - # Number of Logevents by cluster - x=c(1:nbClust) - png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) - coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") - barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") - text(coord,sizeClusters+400,sizeClusters,font=2,xpd=NA) - dev.off() - - - # Profile of test-values by cluster - targetresval=numeric() - nameTargetPlot=character() - for(i in 1:nbClust){ - nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - nameTargetPloti=rep("",nbSpec) - nameTargetPloti[numtargeti]=nomtargeti - nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) - targetresvalclusti=rep(0,nbSpec) - targetresvalclusti[numtargeti]=resval[nomtargeti,i] - targetresval=rbind(targetresval,targetresvalclusti) - } - - png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(targetresval[i,],names.arg=nameTargetPlot[i,], cex.names=1, xlab="Species", ylab="Test-value") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) - dev.off() - - - - - # Descriptive tables of the clusters - clusterDesc=matrix(0,nrow=9,ncol=nbClust) - for(i in 1:nbClust){ - clusterDesc[,i]=c(i, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, - length(which(t(summaryClusters["Median",,i])>50)), - length(which(resval[,i]>1.96)), - length(which(resval[,i]>3.29)), - length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>50)), - length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>90)), - sizeClusters[i]) - } - rownames(clusterDesc)=c("Number of species", - "to have 50% of catch", "to have 90% of catch", - "with a median higher than 50", - "with a test-value > 1.96", "with a test-value > 3.29", - "catch in 50% of the logevents", "catch in 90% of the logevents", - "Clusters size") - colnames(clusterDesc)=1:nbClust - clusterDesc2=as.data.frame(clusterDesc) - - - # Summary tables of the clusters - namesSpecies=matrix(NA,nrow=nbClust,ncol=10) - namesCapt=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) - namesTarget=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) - tabLibname=matrix(NA,nrow=nbClust,ncol=10) - listLibname=list() - - for(i in 1:nbClust){ - namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] - a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) - colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] - if(length(a)!=0){ - namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) - } - namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) - } - - for(i in 1:nbClust){ - listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" - else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) - tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) - } - - tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) - tabTestVal=matrix(NA,nrow=nbClust,ncol=10) - tabPropLog=matrix(NA,nrow=nbClust,ncol=10) - - for(i in 1:nbClust){ - print("-----------------------------------------------------------------") - print(paste("Cluster",i)) - propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabPropCatch[i,1:length(propCatch)]=propCatch - print(propCatch) - testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabTestVal[i,1:length(testVal)]=testVal - print(testVal) - propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)>=0.1)] - tabPropLog[i,1:length(propLog)]=propLog - print(propLog) - } - - tabClusters=array(0,dim=c(10,5,nbClust)) - dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") - dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) - } - - sizeTabClusters=numeric() - for(i in 1:nbClust){ - sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) - } - - - # Target Species - # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch - # - Test-value > thresholdTestValue - # - % Logevents > thresholdLogevents - thresholdCatch=75 - thresholdTestValue=3 - thresholdLogevents=30 - - sppCumCatch=list() - sppTestValue=list() - sppLogevents=list() - targetSpeciesByCluster=list() - - for (i in 1:nbClust){ - percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) - nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] - - sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] - - targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) - targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) - } - - # List of metiers (level 7) - listMetiersL7=list() - for (i in 1:nbClust){ - metiersClusteri=targetSpeciesByCluster[[i]] - metiersClusteri=as.character(unique(unlist(metiersClusteri))) - metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") - listMetiersL7[[i]]=metiersClusteri - } - - # Metier (level 7) of each logevent - metierByLogeventL7=unlist(sapply(clusters,function(x) listMetiersL7[[x]])) - - - - # Create csv tables - write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") - - dfClust=data.frame() - dfClust=paste("Clust ",1:nbClust,sep="") - for(i in 1:nbClust){ - write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) - write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - } - - - LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) - print(" --- end of step 3 ---") - print(Sys.time()-t1) - - return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, sizeClusters=sizeClusters, - betweenVarClassifOnTot=betweenVarClassifOnTot, mProfilSample=mProfilSample, - nbClust=nbClust, summaryClusters=summaryClusters, testValues=resval, - testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, - descClusters=clusterDesc2, tabClusters=tabClusters, - targetSpecies=targetSpeciesByCluster)) - - } else - - - - - -######################################################################################################################################## KMEANS - - if(methMetier=="kmeans"){ - # Calculation of optimal k thanks to within variance - nbLog=nrow(datLog) - - varintra=numeric() - for (k in 2:15){ - clustersKmeans=kmeans(datLog, k, iter.max=60, nstart=10) - varintra[k]=1/nbLog*sum(clustersKmeans$withinss) - } - nbClust=which(scree(varintra)[,"epsilon"]<0)[3] - - - png(paste(analysisName,"Within variance of the classification.png",sep="_"), width = 1200, height = 800) - plot(varintra,main="Within clusters variance",xlab="Number of clusters",ylab="Within Variance") - dev.off() - - Store(objects()) - gc(reset=TRUE) - - # KMEANS with k optimal - clusters=kmeans(datLog, nbClust, iter.max=60, nstart=10, algorithm="Hartigan-Wong") - - - # Within and between variance of clusters and classification - centerOfGravityClassif=numeric() - withinVarClusters=numeric() - sizeClusti=numeric() - centerOfGravityDatLog=colMeans(datLog) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) - for(k in 1:nbClust){ # Within variance by cluster - - clusti=datLog[which(clusters$cluster==k),] - if(length(which(clusters$cluster==k))==1) centerOfGravityClusti=clusti - else centerOfGravityClusti=colMeans(clusti) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) - sizeClusti[k]=length(which(clusters$cluster==k)) - if(length(which(clusters$cluster==k))==1) withinVarClusters[k]=0 - else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) - - } - # Between variance - classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) - # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification - withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - - - # Compute the test-values for species - resval=test.values(clusters$cluster,datSpecies) - # Determine the target species - target=targetspecies(resval) - rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) - - - # Compute the percentage of logevents catching each species by cluster - mainSpecies=colnames(datSpecies) - percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) - for(i in 1:nbClust){ - percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1) - } - - - # Projections on the first factorial plans - png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(2,3)) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") - if(dim(datLog)[2]>2) { - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") - plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") - if(dim(datLog)[2]>3) { - plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") - plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") - plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") - }} - par(op) - dev.off() - - # For a paper - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 2", ylab="Axis 3") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_2_3_Kmeans',sep="_"), type='png') - dev.off() - - - # Catch profile of the dataset - meanprofile=colMeans(datSpecies) - png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) - op <- par(las=2) - barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") - par(op) - mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - dev.off() - - - # Catch profile by cluster - nbSpec=ncol(datSpecies) - summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) - dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") - dimnames(summaryClusters)[[2]]=names(meanprofile) - dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - if(clusters$size[i]==1){ - summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$cluster==i),])),2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - }else{ - summaryClusters[,,i]=apply(datSpecies[which(clusters$cluster==i),],2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - } - } - # Species names for catch profile plots - nameSpPlot=character() - catchMeanThreshold=2 - for(i in 1:nbClust){ - namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) - numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) - nameSpPloti=rep("",nbSpec) - nameSpPloti[numSpi]=namSpi - nameSpPlot=rbind(nameSpPlot,nameSpPloti) - } - png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Catch profile by cluster","\n","\n",sep="")) - dev.off() - - - # For a paper : levelplot - X11(4,4) - mat <- t(summaryClusters["Mean",,]) - #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", - # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] - rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] - sp <- apply(mat,2, function(x) length(which(x>20))) - sp2=sp[which(sp>=1)] - #colnames(mat)[sp==0] <- "" - mat2=mat[,which(colnames(mat)%in%names(sp2))] - cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") - print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) - savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') - dev.off() - - - # Standard deviation profile by cluster - sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) - namSdPlot=character() - SdThreshold=2 - for(i in 1:nbClust){ - if(length(which(clusters$cluster==i))==1){ sdprofilclusti=rep(0,nbSpec) - }else{sdprofilclusti=apply(datSpecies[which(clusters$cluster==i),],2,sd)} - namSDi=names(which(sdprofilclusti>SdThreshold)) - numSDi=which(sdprofilclusti>SdThreshold) - namSdPloti=rep("",nbSpec) - namSdPloti[numSDi]=namSDi - sdprofil[i,]=sdprofilclusti - namSdPlot=rbind(namSdPlot,namSdPloti) - } - rownames(sdprofil) <- 1:nrow(sdprofil) - png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) - dev.off() - - - # Number of Logevents by cluster - x=c(1:nbClust) - png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) - coord=barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") - barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") - text(coord,clusters$size+400,clusters$size,font=2,xpd=NA) - dev.off() - - - # Profile of test-values by cluster - targetresval=numeric() - nameTargetPlot=character() - for(i in 1:nbClust){ - nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - nameTargetPloti=rep("",nbSpec) - nameTargetPloti[numtargeti]=nomtargeti - nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) - targetresvalclusti=rep(0,nbSpec) - targetresvalclusti[numtargeti]=resval[nomtargeti,i] - targetresval=rbind(targetresval,targetresvalclusti) - } - - png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(targetresval[i,], cex.names=1, names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) - dev.off() - - - - - # Descriptive tables of the clusters - clusterDesc=matrix(0,nrow=9,ncol=nbClust) - for(i in 1:nbClust){ - clusterDesc[,i]=c(i, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, - length(which(t(summaryClusters["Median",,i])>50)), - length(which(resval[,i]>1.96)), - length(which(resval[,i]>3.29)), - length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>50)), - length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>90)), - clusters$size[i]) - } - rownames(clusterDesc)=c("Number of species", - "to have 50% of catch", "to have 90% of catch", - "with a median higher than 50", - "with a test-value > 1.96", "with a test-value > 3.29", - "catch in 50% of the logevents", "catch in 90% of the logevents", - "Clusters size") - colnames(clusterDesc)=1:nbClust - clusterDesc2=as.data.frame(clusterDesc) - - - # Summary tables of the clusters - namesSpecies=matrix(NA,nrow=nbClust,ncol=10) - namesCapt=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) - namesTarget=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) - tabLibname=matrix(NA,nrow=nbClust,ncol=10) - listLibname=list() - - for(i in 1:nbClust){ - namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] - a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) - colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] - if(length(a)!=0){ - namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) - } - namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) - } - - for(i in 1:nbClust){ - listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" - else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) - tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) - } - - tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) - tabTestVal=matrix(NA,nrow=nbClust,ncol=10) - tabPropLog=matrix(NA,nrow=nbClust,ncol=10) - - for(i in 1:nbClust){ - print("-----------------------------------------------------------------") - print(paste("Cluster",i)) - propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabPropCatch[i,1:length(propCatch)]=propCatch - print(propCatch) - testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabTestVal[i,1:length(testVal)]=testVal - print(testVal) - propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)>=0.1)] - tabPropLog[i,1:length(propLog)]=propLog - print(propLog) - } - - tabClusters=array(0,dim=c(10,5,nbClust)) - dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") - dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) - } - - sizeTabClusters=numeric() - for(i in 1:nbClust){ - sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) - } - - - # Target Species - # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch - # - Test-value > thresholdTestValue - # - % Logevents > thresholdLogevents - thresholdCatch=75 - thresholdTestValue=3 - thresholdLogevents=30 - - sppCumCatch=list() - sppTestValue=list() - sppLogevents=list() - targetSpeciesByCluster=list() - - for (i in 1:nbClust){ - percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) - nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] - - sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] - - targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) - targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) - } - - # List of metiers (level 7) - listMetiersL7=list() - for (i in 1:nbClust){ - metiersClusteri=targetSpeciesByCluster[[i]] - metiersClusteri=as.character(unique(unlist(metiersClusteri))) - metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") - listMetiersL7[[i]]=metiersClusteri - } - - # Metier (level 7) of each logevent - metierByLogeventL7=unlist(sapply(clusters$cluster,function(x) listMetiersL7[[x]])) - - - - # Create csv tables - write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") - - dfClust=data.frame() - dfClust=paste("Clust ",1:nbClust,sep="") - for(i in 1:nbClust){ - write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) - write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - } - - - LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) - print(" --- end of step 3 ---") - print(Sys.time()-t1) - - return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, - betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, - summaryClusters=summaryClusters, testValues=resval, - testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, - descClusters=clusterDesc2, tabClusters=tabClusters, - targetSpecies=targetSpeciesByCluster)) - - } else - - - - - -######################################################################################################################################## PAM - - if(methMetier=="pam"){ - - # Calculation of optimal k thanks to the silhouette (second maximum) - nbLog=nrow(datLog) - - clustersPam.silcoeff=numeric() - clustersPam.silcoeff[1]=0 - clustersPam.silcoeff[2]=0 - clustersPam.silcoeff[3]=0 - - k=2 - compMax=1 - repeat{ - k=k+2 - print(k) - clustersPam=pam(datLog,k) - clustersPam.silcoeff[k]=clustersPam$silinfo$avg.width - clustersPam=pam(datLog,k+1) - clustersPam.silcoeff[k+1]=clustersPam$silinfo$avg.width - if((clustersPam.silcoeff[k-2]clustersPam.silcoeff[k]) & compMax<=2){ - if(compMax==2){ - nbClust=k-1 - print(paste("2e max =",k-1)) - print(paste("nbClust =",nbClust)) - break - } else { - compMax=compMax+1 - print(paste("compMax1 =",compMax)) - print(paste("1er max =",k-1)) - } - } - if((clustersPam.silcoeff[k-1]clustersPam.silcoeff[k+1]) & compMax<=2){ - if(compMax==2){ - nbClust=k - print(paste("2e max =",k)) - print(paste("nbClust =",nbClust)) - break - } else { - compMax=compMax+1 - print(paste("compMax2 =",compMax)) - print(paste("1er max =",k)) - } - } - Store(objects()) - gc(reset=TRUE) - } - - png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800) - plot(clustersPam.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette") # k optimal corresponds to second maximum of silhouette's coefficients - dev.off() - - Store(objects()) - gc(reset=TRUE) - - cat("PamSilCoeff",clustersPam.silcoeff,"\n") - - # PAM with optimal k - clusters=pam(datLog,nbClust) - summary(clusters) - - - # Within and between variance of clusters and classification - centerOfGravityClassif=numeric() - withinVarClusters=numeric() - sizeClusti=numeric() - centerOfGravityDatLog=colMeans(datLog) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) - for(k in 1:nbClust){ # Within variance by cluster - - clusti=datLog[which(clusters$clustering==k),] - if(length(which(clusters$clustering==k))==1) centerOfGravityClusti=clusti - else centerOfGravityClusti=colMeans(clusti) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) - sizeClusti[k]=length(which(clusters$clustering==k)) - if(length(which(clusters$clustering==k))==1) withinVarClusters[k]=0 - else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) - - } - # Between variance - classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) - # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification - withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - - - # Compute the test-values for species - resval=test.values(clusters$clustering,datSpecies) - # Determine the target species - target=targetspecies(resval) - nbClust=length(clusters$id.med) - rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) - - - # Compute the percentage of logevents catching each species by cluster - mainSpecies=colnames(datSpecies) - percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) - for(i in 1:nbClust){ - percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1) - } - - - # Projections on the first factorial plans - png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(2,3)) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") - if(dim(datLog)[2]>2) { - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") - plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") - if(dim(datLog)[2]>3) { - plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") - plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") - plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") - }} - par(op) - dev.off() - - - # For a paper - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_2_3_PAM',sep="_"), type='png') - dev.off() - - - # Catch profile of the dataset - meanprofile=colMeans(datSpecies) - png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) - op <- par(las=2) - barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") - par(op) - mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - dev.off() - - - # Catch profile by cluster - nbSpec=ncol(datSpecies) - summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) - dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") - dimnames(summaryClusters)[[2]]=names(meanprofile) - dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - if(clusters$clusinfo[i,1]==1){ - summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - }else{ - summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - } - } - # Species names for catch profile plots - nameSpPlot=character() - catchMeanThreshold=2 - for(i in 1:nbClust){ - namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) - numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) - nameSpPloti=rep("",nbSpec) - nameSpPloti[numSpi]=namSpi - nameSpPlot=rbind(nameSpPlot,nameSpPloti) - } - png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Catch profile by cluster","\n","\n",sep="")) - dev.off() - - - # For a paper : levelplot - X11(4,4) - mat <- t(summaryClusters["Mean",,]) - #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", - # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] - rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] - sp <- apply(mat,2, function(x) length(which(x>20))) - sp2=sp[which(sp>=1)] - #colnames(mat)[sp==0] <- "" - mat2=mat[,which(colnames(mat)%in%names(sp2))] - cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") - print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) - savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') - dev.off() - - - # Standard deviation profile by cluster - sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) - namSdPlot=character() - SdThreshold=2 - for(i in 1:nbClust){ - if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec) - }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)} - namSDi=names(which(sdprofilclusti>SdThreshold)) - numSDi=which(sdprofilclusti>SdThreshold) - namSdPloti=rep("",nbSpec) - namSdPloti[numSDi]=namSDi - sdprofil[i,]=sdprofilclusti - namSdPlot=rbind(namSdPlot,namSdPloti) - } - rownames(sdprofil) <- 1:nrow(sdprofil) - png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) - dev.off() - - - # Number of Logevents by cluster - x=c(1:nbClust) - png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) - coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") - barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") - text(coord,clusters$clusinfo[,1]+200,clusters$clusinfo[,1],font=2,xpd=NA) - dev.off() - - - # Profile of test-values by cluster - targetresval=numeric() - nameTargetPlot=character() - for(i in 1:nbClust){ - nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - nameTargetPloti=rep("",nbSpec) - nameTargetPloti[numtargeti]=nomtargeti - nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) - targetresvalclusti=rep(0,nbSpec) - targetresvalclusti[numtargeti]=resval[nomtargeti,i] - targetresval=rbind(targetresval,targetresvalclusti) - } - - png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) - dev.off() - - - - # Descriptive tables of the clusters - clusterDesc=matrix(0,nrow=9,ncol=nbClust) - for(i in 1:nbClust){ - clusterDesc[,i]=c(i, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, - length(which(t(summaryClusters["Median",,i])>50)), - length(which(resval[,i]>1.96)), - length(which(resval[,i]>3.29)), - length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)), - length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)), - clusters$clusinfo[i,1]) - } - rownames(clusterDesc)=c("Number of species", - "to have 50% of catch", "to have 90% of catch", - "with a median higher than 50", - "with a test-value > 1.96", "with a test-value > 3.29", - "catch in 50% of the logevents", "catch in 90% of the logevents", - "Clusters size") - colnames(clusterDesc)=1:nbClust - clusterDesc2=as.data.frame(clusterDesc) - - - # Summary tables of the clusters - namesSpecies=matrix(NA,nrow=nbClust,ncol=10) - namesCapt=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) - namesTarget=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) - tabLibname=matrix(NA,nrow=nbClust,ncol=10) - listLibname=list() - - for(i in 1:nbClust){ - namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] - a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) - colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] - if(length(a)!=0){ - namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) - } - namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) - } - - for(i in 1:nbClust){ - listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" - else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) - tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) - } - - tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) - tabTestVal=matrix(NA,nrow=nbClust,ncol=10) - tabPropLog=matrix(NA,nrow=nbClust,ncol=10) - - for(i in 1:nbClust){ - print("-----------------------------------------------------------------") - print(paste("Cluster",i)) - propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabPropCatch[i,1:length(propCatch)]=propCatch - print(propCatch) - testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabTestVal[i,1:length(testVal)]=testVal - print(testVal) - propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)] - tabPropLog[i,1:length(propLog)]=propLog - print(propLog) - } - - tabClusters=array(0,dim=c(10,5,nbClust)) - dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") - dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) - } - - sizeTabClusters=numeric() - for(i in 1:nbClust){ - sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) - } - - - # Target Species - # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch - # - Test-value > thresholdTestValue - # - % Logevents > thresholdLogevents - thresholdCatch=75 - thresholdTestValue=3 - thresholdLogevents=30 - - sppCumCatch=list() - sppTestValue=list() - sppLogevents=list() - targetSpeciesByCluster=list() - - for (i in 1:nbClust){ - percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) - nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] - - sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] - - targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) - targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) - } - - # List of metiers (level 7) - listMetiersL7=list() - for (i in 1:nbClust){ - metiersClusteri=targetSpeciesByCluster[[i]] - metiersClusteri=as.character(unique(unlist(metiersClusteri))) - metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") - listMetiersL7[[i]]=metiersClusteri - } - - # Metier (level 7) of each logevent - metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]])) - - - # Create csv tables - write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") - - dfClust=data.frame() - dfClust=paste("Clust ",1:nbClust,sep="") - for(i in 1:nbClust){ - write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) - write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - } - - - LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) - print(" --- end of step 3 ---") - print(Sys.time()-t1) - - return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, - betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, - summaryClusters=summaryClusters, testValues=resval, - testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, - descClusters=clusterDesc2, tabClusters=tabClusters, - targetSpecies=targetSpeciesByCluster)) - - } else - - - - - -######################################################################################################################################## CLARA - - if(methMetier=="clara"){ - nbLog=nrow(datLog) - propSample=0.1 - - # Calculation of optimal k thanks to the silhouette (second maximum) - clustersClara.silcoeff=numeric() - clustersClara.silcoeff[1]=0 - clustersClara.silcoeff[2]=0 - clustersClara.silcoeff[3]=0 - k=2 - compMax=1 - repeat{ - k=k+2 - print(k) - clustersClara=clara(datLog, k, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*k))) - clustersClara.silcoeff[k]=clustersClara$silinfo$avg.width - clustersClara=clara(datLog, k+1, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*(k+1)))) - clustersClara.silcoeff[k+1]=clustersClara$silinfo$avg.width - if((clustersClara.silcoeff[k-2]clustersClara.silcoeff[k]) & compMax<=2){ - if(compMax==2){ - nbClust=k-1 - print(paste("2e max =",k-1)) - print(paste("nbClust =",nbClust)) - break - } else { - compMax=compMax+1 - print(paste("compMax1 =",compMax)) - print(paste("1er max =",k-1)) - } - } - if((clustersClara.silcoeff[k-1]clustersClara.silcoeff[k+1]) & compMax<=2){ - if(compMax==2){ - nbClust=k - print(paste("2e max =",k)) - print(paste("nbClust =",nbClust)) - break - } else { - compMax=compMax+1 - print(paste("compMax2 =",compMax)) - print(paste("1er max =",k)) - } - } - Store(objects()) - gc(reset=TRUE) - } - - - png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800) - plot(clustersClara.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette") # k optimal corresponds to maximum of silhouette's coefficients - dev.off() - - Store(objects()) - gc(reset=TRUE) - - cat("ClaraSilCoeff",clustersClara.silcoeff,"\n") - - - # CLARA with optimal k - clusters=clara(datLog, nbClust, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*nbClust))) # CLARA with optimal k - summary(clusters) - - - # Within and between variance of clusters and classification - centerOfGravityClassif=numeric() - withinVarClusters=numeric() - sizeClusti=numeric() - centerOfGravityDatLog=colMeans(datLog) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) - for(k in 1:nbClust){ # Within variance by cluster - - clusti=datLog[which(clusters$clustering==k),] - if(length(which(clusters$clustering==k))==1) centerOfGravityClusti=clusti - else centerOfGravityClusti=colMeans(clusti) - centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) - sizeClusti[k]=length(which(clusters$clustering==k)) - if(length(which(clusters$clustering==k))==1) withinVarClusters[k]=0 - else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) - - } - # Between variance - classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) - # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification - withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 - - - # Compute the test-values for species - resval=test.values(clusters$cluster,datSpecies) - # Determine the target species - target=targetspecies(resval) - rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) - - - # Compute the percentage of logevents catching each species by cluster - mainSpecies=colnames(datSpecies) - percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) - for(i in 1:nbClust){ - percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1) - } - - - # Projections on the first factorial plans - png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(2,3)) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") - if(dim(datLog)[2]>2) { - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") - plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") - if(dim(datLog)[2]>3) { - plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") - plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") - plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") - }} - par(op) - dev.off() - - - # For a paper - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png') - dev.off() - - X11(5,5) - plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3") - abline(h=0, lty=2) ; abline(v=0, lty=2) - savePlot(filename=paste(analysisName,'projections_2_3_CLARA',sep="_"), type='png') - dev.off() - - # Catch profile of the dataset - meanprofile=colMeans(datSpecies) - png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) - op <- par(las=2) - barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") - par(op) - mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - dev.off() - - - # Catch profile by cluster - nbSpec=ncol(datSpecies) - summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) - dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") - dimnames(summaryClusters)[[2]]=names(meanprofile) - dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - if(clusters$clusinfo[i,1]==1){ - summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - }else{ - summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2, - function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) - } - } - # Species names for catch profile plots - nameSpPlot=character() - catchMeanThreshold=2 - for(i in 1:nbClust){ - namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) - numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) - nameSpPloti=rep("",nbSpec) - nameSpPloti[numSpi]=namSpi - nameSpPlot=rbind(nameSpPlot,nameSpPloti) - } - # Plot - png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Catch profile by cluster","\n","\n",sep="")) - dev.off() - - - # For a paper : levelplot - X11(4,4) - mat <- t(summaryClusters["Mean",,]) - #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", - # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] - rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] - sp <- apply(mat,2, function(x) length(which(x>20))) - sp2=sp[which(sp>=1)] - #colnames(mat)[sp==0] <- "" - mat2=mat[,which(colnames(mat)%in%names(sp2))] - cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") - print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) - savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') - dev.off() - - # OR # - mat <- t(summaryClusters["Mean",,]) - #mat=mat[,order(colnames(mat),decreasing=TRUE)] # si on veut mettre les esp�ces par ordre descendant - rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] - sp <- apply(mat,2, function(x) length(which(x>20))) - colnames(mat)[sp==0] <- "" - cc <- colorRampPalette(c("white", "steelblue2", "blue4"),space = "rgb", interpolate="spline") - png(filename = paste(paste(analysisName,'mean_profile_by_cluster_levelplot_blue',sep="_"),".png",sep=""), width = 400, height = 800) - print(levelplot(mat, cuts=4, aspect=3, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.8), colorkey=list(space="right", at=c(0,20,40,60,80,100)))) - dev.off() - - - # Standard deviation profile by cluster - sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) - namSdPlot=character() - SdThreshold=5 - for(i in 1:nbClust){ - if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec) - }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)} - namSDi=names(which(sdprofilclusti>SdThreshold)) - numSDi=which(sdprofilclusti>SdThreshold) - namSdPloti=rep("",nbSpec) - namSdPloti[numSDi]=namSDi - sdprofil[i,]=sdprofilclusti - namSdPlot=rbind(namSdPlot,namSdPloti) - } - rownames(sdprofil) <- 1:nrow(sdprofil) - png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) - dev.off() - - - # Number of Logevents by cluster - x=c(1:nbClust) - png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) - coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") - barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") - text(coord,clusters$clusinfo[,1]+5,clusters$clusinfo[,1],font=2,xpd=NA) - dev.off() - - - # Profile of test-values by cluster - targetresval=matrix(0,nrow=nbClust,ncol=nbSpec) - colnames(targetresval)=colnames(datSpecies) - rownames(targetresval)=1:nbClust - nameTargetPlot=matrix(NA,nrow=nbClust,ncol=nbSpec) - for(i in 1:nbClust){ - nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) - nameTargetPlot[i,numtargeti]=nomtargeti - targetresval[i,numtargeti]=resval[nomtargeti,i] - } - - png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) - for(i in 1:nbClust){ - op2 <- par(las=2) - barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value") - par(op2) - mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") - } - par(op) - title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) - dev.off() - - - - # Descriptive tables of the clusters - clusterDesc=matrix(0,nrow=9,ncol=nbClust) - for(i in 1:nbClust){ - clusterDesc[,i]=c(i, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, - length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, - length(which(t(summaryClusters["Median",,i])>50)), - length(which(resval[,i]>1.96)), - length(which(resval[,i]>3.29)), - length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)), - length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)), - clusters$clusinfo[i,1]) - } - rownames(clusterDesc)=c("Number of species", - "to have 50% of catch", "to have 90% of catch", - "with a median higher than 50", - "with a test-value > 1.96", "with a test-value > 3.29", - "catch in 50% of the logevents", "catch in 90% of the logevents", - "Clusters size") - colnames(clusterDesc)=1:nbClust - clusterDesc2=as.data.frame(clusterDesc) - - - # Summary tables of the clusters - namesSpecies=matrix(NA,nrow=nbClust,ncol=10) - namesCapt=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) - namesTarget=matrix(NA,nrow=nbClust,ncol=5) - nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) - tabLibname=matrix(NA,nrow=nbClust,ncol=10) - listLibname=list() - - for(i in 1:nbClust){ - namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] - a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) - colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] - if(length(a)!=0){ - namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) - } - namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) - } - - for(i in 1:nbClust){ - listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" - else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) - tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) - } - - tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) - tabTestVal=matrix(NA,nrow=nbClust,ncol=10) - tabPropLog=matrix(NA,nrow=nbClust,ncol=10) - - for(i in 1:nbClust){ - print("-----------------------------------------------------------------") - print(paste("Cluster",i)) - propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabPropCatch[i,1:length(propCatch)]=propCatch - print(propCatch) - testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] - tabTestVal[i,1:length(testVal)]=testVal - print(testVal) - propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)] - tabPropLog[i,1:length(propLog)]=propLog - print(propLog) - } - - tabClusters=array(0,dim=c(10,5,nbClust)) - dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") - dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) - for(i in 1:nbClust){ - tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) - } - - sizeTabClusters=numeric() - for(i in 1:nbClust){ - sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) - } - - - # Target Species - # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch - # - Test-value > thresholdTestValue - # - % Logevents > thresholdLogevents - thresholdCatch=75 - thresholdTestValue=3 - thresholdLogevents=30 - - sppCumCatch=list() - sppTestValue=list() - sppLogevents=list() - targetSpeciesByCluster=list() - - for (i in 1:nbClust){ - percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) - nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] - - sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] - - targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) - targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) - } - - # List of metiers (level 7) - listMetiersL7=list() - for (i in 1:nbClust){ - metiersClusteri=targetSpeciesByCluster[[i]] - metiersClusteri=as.character(unique(unlist(metiersClusteri))) - metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") - listMetiersL7[[i]]=metiersClusteri - } - - # Metier (level 7) of each logevent - metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]])) - - - - # Create csv tables - write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") - - dfClust=data.frame() - dfClust=paste("Clust ",1:nbClust,sep="") - for(i in 1:nbClust){ - write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) - write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") - } - - - LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) - print(" --- end of step 3 ---") - print(Sys.time()-t1) - - return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, - betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, - summaryClusters=summaryClusters, testValues=resval, - testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, - descClusters=clusterDesc2, tabClusters=tabClusters, - targetSpecies=targetSpeciesByCluster)) - - } else stop("methMetier must be hac, kmeans, pam or clara") - # end of the methods - - -} # end of the function "getMetierClusters" - +################################################################################ +# STEP 3 OF THE MULTIVARIATE CLASSIFICATION : # +# RUN THE CLUSTERING OF THE LOGEVENTS # +# 4 METHODS ARE AVALAIBLE : HAC / KMEANS / PAM / CLARA # +################################################################################ + + + + +#' Finding metiers from a reduced EFLALO dataset, step 3: clustering logevents +#' using various multivariate methods +#' +#' This function represents the third step in the workflow processing logbooks +#' data for identifying metiers. +#' +#' This step allows applying various clustering analyses on the data sets +#' coming out of the first and second step. All methods will lead to a +#' classification of all individuals (logevents), but they differ in their +#' nature and then consequently in their outcomes. The four methods available +#' are - Hierarchical Ascending Classification (HAC), with user-defined method +#' for estimating distances and link for aggregating individuals\cr - +#' K-Means,\cr - Partitioning Around Medoids (PAM),\cr - Clustering LARge +#' Applications (CLARA). +#' +#' The HAC method works by calculating the distance between individuals using +#' the method selected with param1 ("euclidean", "maximum", "manhattan", +#' "canberra", "binary" or "minkowski") and aggregating them based on the +#' distance between clusters, using the link selected with param2 ("ward", +#' "single", "complete", "average", "mcquitty", "median" or "centroid"). In +#' HAC, the number of classes is determined afterwards, once all combinations +#' have been calculated, by using the objective criteria of scree test which +#' detects the third marginal loss of inertia between two consecutive numbers +#' of classes. Therefore, the computing time and memory request for this method +#' can be quite comprehensive, and may reach memory limits on standard PC when +#' operating with very large datasets. +#' +#' The K-Means method works by randomly choosing k individuals, or kernels (k +#' corresponding to the final number of classes), and then affecting each +#' individuals of the dataset to the closest kernel. Each time, the gravity +#' center of the class is recalculated, thus reinitialising the calculation of +#' distances to the next individual. In order to define the most appropriate +#' number of classes, this procedure is repeated with differents values for k, +#' from 2 to 15. The final number of classes is identified by using the +#' criteria of scree test which detects the third marginal loss of inertia +#' between two consecutive numbers of classes. +#' +#' The PAM method works slightly around the same principle, starting with the +#' initialisation of k medoids. The medoid is the individual in a class which +#' shows least dissimilarity with other individuals in the same class, and the +#' remaining individuals are affected to their closest medoid. Then the sum of +#' dissimilarities is calculated and compared with the sum of dissimilarities +#' if any other individual in the class had been playing the role of the +#' medoid, and then the medoid is eventually adjusted accordingly, until full +#' stabilisation of the procedure. The most appropriate number of classes is +#' identified by using the estimated silhouette of the classification for each +#' value of k. The silhouette represent an average comparison of the distance +#' between an individual and the other individuals from its class, and between +#' the same individual and the other individuals from the next closest class, +#' and is therefore an objective measurement of the quality of the +#' classification. The final number of classes retained is the one for which +#' the second maximum of the silhouettes is reached. It is to be noted that the +#' PAM method is not designed for working with very large datasets (>10 000 +#' lines), and may quickly reach memory limits. +#' +#' The CLARA method is an extension of the PAM algorithm aiming at working with +#' large datasets, and is therefore more efficient and powerful than PAM. It +#' works by sampling 5 subsets of the dataset for each value of k and running +#' the PAM algorithm explained above on these subsets, and then to keep only +#' the subset giving the best classification (silhouette). As in PAM, the final +#' number of classes retained is the one for which the second maximum of the +#' silhouettes is reached. Afterwards, all remaining individuals are affected +#' to their closest medoid, using user-defined method (param1) for calculating +#' distances ('euclidean' being used as the default, but 'manhattan' could also +#' be used). +#' +#' +#' @param datSpecies numerical matrix with logevents as rows and species as +#' columns, with percentage values (between 0 and 100) of each species in the +#' logevent catches. Logevent ID (LE_ID) should be as row names. Typically, +#' this input table will be produced from the step 1 of the metier analysis +#' applied on the eflalo initial data, using the function +#' extractTableMainSpecies() +#' @param datLog numerical matrix with logevents as rows, and values to be used +#' for calculating distances between individuals as columns. Typically, this +#' input table is produced by the step 2 of the metier analysis, using the +#' function getTableAfterPCA(). If a PCA was run, selected Principal Components +#' will appear as columns. If no PCA was run, the matrix will be the same as +#' datSpecies, with percentage values by species. +#' @param analysisName character, the name of the run. Used for the file name +#' of the plots. +#' @param methMetier character. The name of the clustering method to be used. +#' Must be chosen between "hac", "kmeans", "pam" and "clara". +#' @param param1 character. Parameter used for chosing the method calculating +#' distances between individuals, to be used in HAC and CLARA algorithms. For +#' HAC, it can be chosen between "euclidean", "maximum", "manhattan", +#' "canberra", "binary" or "minkowski". For CLARA, between "euclidean" and +#' "manhattan". For PAM and K-means, this must be set to NULL. +#' @param param2 character. Parameter used for chosing the method calculating +#' distances between clusters, to be used in HAC algorithm. For HAC, it can be +#' chosen between "ward", "single", "complete", "average", "mcquitty", "median" +#' or "centroid". For PAM, CLARA and K-means, this must be set to NULL. +#' @return For any of the four methods used, a number of graphs are produced +#' and saved directly in the working directory. They describe 1) projections +#' of results on factorial plans, 2) the mean profile of the whole dataset, +#' i.e. the average percentage of the various species in a logevent across all +#' individuals, 3) the mean and standard deviation profile in terms of average +#' percentage of catch by species within each cluster, 4) the number of +#' logevents by clusters, and 5) the profile of test-values by cluster. +#' +#' Finally, the function returns a list with a number of results and +#' diagnostics on the performance of the method: \item{LE_ID_clust}{a data +#' frame with two columns, linking the initial ID name of the Logevent (LE_ID) +#' with the cluster metier where the ID has been allocated. } +#' \item{clusters}{diagnostics of the clustering process. It may vary between +#' the four methods. } \item{betweenVarClassifOnTot}{percentage of variance +#' explained by the classification. } \item{nbClust}{final number of clusters +#' retained. } \item{summaryClusters}{array documenting, for each cluster, the +#' minimum, mean, maximum, as well as the 25\%, 50\% and 75\% quantiles values +#' of the percentage of catch by species for the individual logevents in the +#' cluster. } \item{testValues}{matrix of test-values by species and cluster. +#' The test-value measures for each species the difference between the average +#' percentage of catches in the cluster compared to the average percentage of +#' catch in the total dataset, thus large positive values (>1.98) will point +#' out the most characteristic species in the clusters. } +#' \item{testValuesSpecies}{a tabulated list ranking the most characteristic +#' species in the clusters (ordering species with a test-value > 1.98 by +#' decreasing order of test-value). } \item{percLogevents}{a matrix giving the +#' percentage of logevents catching each species by cluster. } +#' \item{descClusters}{a data frame giving some descriptive statistics for each +#' cluster, like cluster size, number of species needed to have at least 50\% +#' of the cluster's total catch, number of species with a test-value > 1.98 in +#' the cluster, number of species caught in at least 50\% of the logevents, +#' etc... } \item{tabClusters}{a 3d-array giving a table summurazing for each +#' cluster the most important species (in terms of catch), the associated +#' test-value, and the percentage of logevents of the cluster catching these +#' species. } \item{targetSpecies}{a list giving the target species by +#' cluster. } +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @seealso \code{\link{getEflaloMetierLevel7}}, +#' \code{\link{selectMainSpecies}}, \code{\link{extractTableMainSpecies}}, +#' \code{\link{getMetierClusters}}, \code{\link{getTableAfterPCA}} +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @examples +#' +#' +#' \dontrun{ +#' +#' data(eflalo) +#' +#' eflalo <- formatEflalo(eflalo) +#' +#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",] +#' # note that output plots will be sent to getwd() +#' analysisName <- "metier_analysis_OTB" +#' +#' dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))] +#' names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]])) +#' +#' explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE) +#' #=> send the LE_ID and LE_KG_SP columns only +#' +#' Step1 <- extractTableMainSpecies(dat, explo$NamesMainSpeciesHAC, +#' paramTotal=95, paramLogevent=100) +#' #=> send the LE_ID and LE_KG_SP columns only +#' +#' rowNamesSave <- row.names(Step1) +#' row.names(Step1) <- 1:nrow(Step1) +#' +#' # Run a PCA +#' Step2 <- getTableAfterPCA(Step1, analysisName, pcaYesNo="pca", +#' criterion="70percents") +#' +#' row.names(Step1) <- rowNamesSave +#' row.names(Step2) <- rowNamesSave +#' +#' # Define a metier for each logevent running the CLARA algorithm +#' Step3 <- getMetierClusters(Step1, Step2, analysisName, +#' methMetier="clara", param1="euclidean", param2=NULL) +#' +#' } +#' +#' +#' @export getMetierClusters +getMetierClusters = function(datSpecies,datLog,analysisName="",methMetier="clara",param1="euclidean",param2=NULL){ + + # Load the table linking 3A-CODE (FAO CODE of species) to the species assemblage (level 5). + data(correspLevel7to5) + require(lattice) + + LE_ID=rownames(datSpecies) + nbSpec=ncol(datSpecies) + datSpecies=as.matrix(datSpecies,ncol=nbSpec,nrow=length(LE_ID)) + + print("######## STEP 3 CLUSTERING ########") + + t1=Sys.time() + print(paste(" --- selected method :",methMetier, " ---")) + + +######################################################################################################################################## HAC + + if(methMetier=="hac"){ + + classifWithinVar=numeric() + classifBetweenVar=numeric() + classifQuality=numeric() + sampleList=numeric() + mProfilSample=numeric() + classifVarExplain=numeric() + + nbLog=nrow(datLog) + nbDim=ncol(datLog) + + # Center of gravity of datLog + centerOfGravityDatLog=colMeans(datLog) + + # HAC like CLARA (HAC on sample, affectation of each logevent to a cluster, quality of classification, do it 5 times, choose the sample which gives the best quality of classification) + print("hac on subsets...") + + for(i in 1:5){ + + numSample=i + print(paste("sample",i)) + # Sample of size 10000 logevents or 30% of all logevents + minsam=min(nbLog,max(10000,round(nbLog*30/100))) + sam=sample(1:nbLog,size=minsam,replace=FALSE) + # Record the 5 samples + sampleList=rbind(sampleList,sam) + outofsam=setdiff(1:nbLog,sam) + sampleDatLog=datLog[sam,] + sampleDatSpecies=datSpecies[sam,] + + # HAC on the sample + log.hac=hcluster(sampleDatLog, method=param1, link=param2) + inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)] + nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3] + + # Cut the dendogram at the selected level + sampleClusters=cutree(log.hac,k=nbClust) + + Store(objects()) + gc(reset=TRUE) + + # Add the cluster to each logevent of the sample + sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters) + + sampleClusters=sampleDatLogWithClusters[,ncol(sampleDatLogWithClusters)] + + # Within and between variance of clusters and classification + centerOfGravityClassif=numeric() + withinVarClusters=numeric() + sizeClusti=numeric() + centerOfGravitySampleDatLog=colMeans(sampleDatLog) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravitySampleDatLog) + for(k in 1:nbClust){ # Within variance by cluster + + clusti=sampleDatLogWithClusters[which(sampleClusters==k),1:nbDim] + if(length(which(sampleClusters==k))==1) centerOfGravityClusti=clusti + else centerOfGravityClusti=colMeans(clusti) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) + sizeClusti[k]=length(which(sampleClusters==k)) + if(length(which(sampleClusters==k))==1) withinVarClusters[k]=0 + else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) + + } + # Between variance + classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) + # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification + withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + classifVarExplain=c(classifVarExplain,betweenVarClassifOnTot) + + + # Catch profile by cluster for each sample + nbSpec=ncol(datSpecies) + mprofil=numeric() + blank=rep(00,nbSpec) + for(k in 1:nbClust){ + mprofilclusti=colMeans(sampleDatSpecies[which(sampleClusters==k),]) + mprofil=rbind(mprofil,mprofilclusti) + } + mprofil=rbind(mprofil,blank) + + mProfilSample=rbind(mProfilSample,mprofil) + + + # Graphics + + # Calculation of each cluster size + sizeClusters=numeric() + for(k in 1:nbClust){ + sizeClusters[k]=length(which(sampleClusters==k)) + } + + # Compute the test-values for species + resval=test.values(sampleClusters,sampleDatSpecies) + # Determine the target species + target=targetspecies(resval) + + + # Projections on the first factorial plans + png(paste(analysisName,numSample,"Sample_Projections.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(2,3)) + plot(sampleDatLog[,1], sampleDatLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") + if(dim(datLog)[2]>2) { + plot(sampleDatLog[,2], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") + plot(sampleDatLog[,1], sampleDatLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") + if(dim(datLog)[2]>3) { + plot(sampleDatLog[,1], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") + plot(sampleDatLog[,2], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") + plot(sampleDatLog[,3], sampleDatLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(sampleClusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") + }} + par(op) + dev.off() + + + # Rectangles plotting + png(paste(analysisName,numSample,"Sample_Dendogram.png",sep="_"), width = 1200, height = 800) + plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE) + title(main="HAC dendogram",xlab="Logevents",ylab="Height") + rect.hclust(log.hac, k=nbClust) + dev.off() + + + # Catch profile of the dataset + meanprofile=colMeans(sampleDatSpecies) + png(paste(analysisName,numSample,"Sample_Catch profile of the sample.png",sep="_"), width = 1200, height = 800) + op <- par(las=2) + barplot(meanprofile, main="Catch profile of the sample", xlab="Species", ylab="Percentage of catch") + par(op) + mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + dev.off() + + + # Catch profile by cluster + nbSpec=ncol(sampleDatSpecies) + summarySampleClusters=array(0,dim=c(6,nbSpec,nbClust)) + dimnames(summarySampleClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") + dimnames(summarySampleClusters)[[2]]=names(meanprofile) + dimnames(summarySampleClusters)[[3]]=paste("Cluster",1:nbClust) + for(k in 1:nbClust){ + if(sizeClusters[k]==1){ + summarySampleClusters[,,k]=apply(t(as.matrix(sampleDatSpecies[which(sampleClusters==k),])),2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + }else{ + summarySampleClusters[,,k]=apply(sampleDatSpecies[which(sampleClusters==k),],2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + } + } + # Species names for catch profile plots + nameSpPlot=character() + catchMeanThreshold=2 + for(k in 1:nbClust){ + namSpi=names(meanprofile[which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold)]) + numSpi=which(t(summarySampleClusters["Mean",,k])>catchMeanThreshold) + nameSpPloti=rep("",nbSpec) + nameSpPloti[numSpi]=namSpi + nameSpPlot=rbind(nameSpPlot,nameSpPloti) + } + png(paste(analysisName,numSample,"Sample_Mean profile by cluster of the sample.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(k in 1:nbClust){ + op2 <- par(las=2) + barplot(t(summarySampleClusters["Mean",,k]), names.arg=nameSpPlot[k,], xlab="Species", ylab="Percentage of catch", col="gray") + par(op2) + mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Catch profile by cluster of the sample","\n","\n",sep="")) + dev.off() + + + # Standard deviation profile by cluster + sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) + namSdPlot=character() + SdThreshold=2 + for(k in 1:nbClust){ + if(length(which(sampleClusters==k))==1){ sdprofilclusti=rep(0,nbSpec) + }else{sdprofilclusti=apply(sampleDatSpecies[which(sampleClusters==k),],2,sd)} + namSDi=names(which(sdprofilclusti>SdThreshold)) + numSDi=which(sdprofilclusti>SdThreshold) + namSdPloti=rep("",nbSpec) + namSdPloti[numSDi]=namSDi + sdprofil[k,]=sdprofilclusti + namSdPlot=rbind(namSdPlot,namSdPloti) + } + rownames(sdprofil) <- 1:nrow(sdprofil) + png(paste(analysisName,numSample,"Sample_Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(k in 1:nbClust){ + op2 <- par(las=2) + barplot(sdprofil[k,], names.arg=namSdPlot[k,], xlab="Species", ylab="Percentage of catch") + par(op2) + mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) + dev.off() + + + # Number of Logevents by cluster + x=c(1:nbClust) + png(paste(analysisName,numSample,"Sample_Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) + coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") + barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") + text(coord,sizeClusters+100,sizeClusters,font=2,xpd=NA) + dev.off() + + + # Target Species profiles (test-value) + targetresval=numeric() + nameTargetPlot=character() + for(k in 1:nbClust){ + nomtargeti=as.character(target$tabnomespcib[k,which(!is.na(target$tabnumespcib[k,]))]) + numtargeti=as.numeric(target$tabnumespcib[k,which(!is.na(target$tabnumespcib[k,]))]) + nameTargetPloti=rep("",nbSpec) + nameTargetPloti[numtargeti]=nomtargeti + nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) + targetresvalclusti=rep(0,nbSpec) + targetresvalclusti[numtargeti]=resval[nomtargeti,k] + targetresval=rbind(targetresval,targetresvalclusti) + } + + png(paste(analysisName,numSample,"Sample_Profile of target species by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(k in 1:nbClust){ + op2 <- par(las=2) + barplot(targetresval[k,],names.arg=nameTargetPlot[k,], cex.names=1, xlab="Species", ylab="Test-value") + par(op2) + mtext(paste("Cluster",k), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Profile of target species by cluster","\n","\n",sep="")) + dev.off() + + Store(objects()) + gc(reset=TRUE) + + } # end of for(i in 1:5) + + + + + # Select the sample which gives the smaller classification's quality (the best sample) + sam=sampleList[which.max(classifVarExplain),] + outofsam=setdiff(1:nbLog,sam) + sampleDatLog=datLog[sam,] + + nbLogSample=nrow(sampleDatLog) + nbDim=ncol(sampleDatLog) + + + # HAC with the best sample + print("Final HAC") + log.hac=hcluster(sampleDatLog, method=param1, link=param2) + + + # Determine the number of cluster thanks to the scree-test + inerties.vector=log.hac$height[order(log.hac$height,decreasing=TRUE)] + nbClust=which(scree(inerties.vector)[,"epsilon"]<0)[3] + + # Cut the dendogram at the selected level + sampleClusters=cutree(log.hac,k=nbClust) + sampleClusters=as.factor(sampleClusters) + + sampleDatLogWithClusters=data.frame() + sampleDatLogWithClusters=cbind(sampleDatLog,sampleClusters) + sampleDatLogWithClusters=as.data.frame(sampleDatLogWithClusters) + + # Discriminante analysis on the learning dataset + learning=lda(sampleClusters~.,data=sampleDatLogWithClusters) + + otherLog=datLog[outofsam,] + otherLog=as.data.frame(otherLog) + + # Predict the cluster for the other logevent + pred=predict(learning,otherLog) + otherDatLogWithClusters=cbind(otherLog,pred$class) + colnames(otherDatLogWithClusters)=colnames(sampleDatLogWithClusters) + + # Rebuilt complete datLog with clusters + clusters=numeric(length=nbLog) + clusters[sam]=sampleClusters + clusters[outofsam]=pred$class + + + # Within and between variance of clusters and classification + centerOfGravityClassif=numeric() + withinVarClusters=numeric() + sizeClusti=numeric() + centerOfGravityDatLog=colMeans(datLog) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) + for(k in 1:nbClust){ # Within variance by cluster + + clusti=datLog[which(clusters==k),1:nbDim] + if(length(which(clusters==k))==1) centerOfGravityClusti=clusti + else centerOfGravityClusti=colMeans(clusti) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) + sizeClusti[k]=length(which(clusters==k)) + if(length(which(clusters==k))==1) withinVarClusters[k]=0 + else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) + + } + # Between variance + classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) + # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification + withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + + + # Calculation of each cluster size + n=nrow(datLog) + sizeClusters=numeric() + for(k in 1:nbClust){ + sizeClusters[k]=length(which(clusters==k)) + } + + # Compute the test-values for species + resval=test.values(clusters,datSpecies) + # Determine the target species + target=targetspecies(resval) + rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) + + + # Compute the percentage of logevents catching each species by cluster + mainSpecies=colnames(datSpecies) + percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) + for(i in 1:nbClust){ + percLogevents[i,]=round(sapply(mainSpecies,function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1) + } + + + # Projections on the first factorial plans + png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(2,3)) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") + if(dim(datLog)[2]>2) { + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") + plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") + if(dim(datLog)[2]>3) { + plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") + plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") + plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="Projection of HAC classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") + }} + par(op) + dev.off() + + # For a paper + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_HAC',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(sizeClusters))[as.numeric(clusters)], main="", xlab="Axis 2", ylab="Axis 3") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_2_3_HAC',sep="_"), type='png') + dev.off() + + + # Rectangles plotting + png(paste(analysisName,"Dendogram.png",sep="_"), width = 1200, height = 800) + plclust(log.hac,labels=FALSE,hang=-1,ann=FALSE) + title(main="HAC dendogram",xlab="Logevents",ylab="Height") + rect.hclust(log.hac, k=nbClust) + dev.off() + + + # Catch profile of the dataset + meanprofile=colMeans(datSpecies) + png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) + op <- par(las=2) + barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") + par(op) + mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + dev.off() + + + # Catch profile by cluster + nbSpec=ncol(datSpecies) + summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) + dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") + dimnames(summaryClusters)[[2]]=names(meanprofile) + dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + if(sizeClusters[i]==1){ + summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters==i),])),2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + }else{ + summaryClusters[,,i]=apply(datSpecies[which(clusters==i),],2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + } + } + # Species names for catch profile plots + nameSpPlot=character() + catchMeanThreshold=2 + for(i in 1:nbClust){ + namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) + numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) + nameSpPloti=rep("",nbSpec) + nameSpPloti[numSpi]=namSpi + nameSpPlot=rbind(nameSpPlot,nameSpPloti) + } + png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Catch profile by cluster","\n","\n",sep="")) + dev.off() + + + # For a paper : levelplot + X11(4,4) + mat <- t(summaryClusters["Mean",,]) + #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", + # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] + rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] + sp <- apply(mat,2, function(x) length(which(x>20))) + sp2=sp[which(sp>=1)] + #colnames(mat)[sp==0] <- "" + mat2=mat[,which(colnames(mat)%in%names(sp2))] + cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") + print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) + savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') + dev.off() + + + # Standard deviation profile by cluster + sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) + namSdPlot=character() + SdThreshold=2 + for(i in 1:nbClust){ + if(length(which(clusters==i))==1){ sdprofilclusti=rep(0,nbSpec) + }else{sdprofilclusti=apply(datSpecies[which(clusters==i),],2,sd)} + namSDi=names(which(sdprofilclusti>SdThreshold)) + numSDi=which(sdprofilclusti>SdThreshold) + namSdPloti=rep("",nbSpec) + namSdPloti[numSDi]=namSDi + sdprofil[i,]=sdprofilclusti + namSdPlot=rbind(namSdPlot,namSdPloti) + } + rownames(sdprofil) <- 1:nrow(sdprofil) + png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) + dev.off() + + + # Number of Logevents by cluster + x=c(1:nbClust) + png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) + coord=barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") + barplot(sizeClusters, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") + text(coord,sizeClusters+400,sizeClusters,font=2,xpd=NA) + dev.off() + + + # Profile of test-values by cluster + targetresval=numeric() + nameTargetPlot=character() + for(i in 1:nbClust){ + nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + nameTargetPloti=rep("",nbSpec) + nameTargetPloti[numtargeti]=nomtargeti + nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) + targetresvalclusti=rep(0,nbSpec) + targetresvalclusti[numtargeti]=resval[nomtargeti,i] + targetresval=rbind(targetresval,targetresvalclusti) + } + + png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(targetresval[i,],names.arg=nameTargetPlot[i,], cex.names=1, xlab="Species", ylab="Test-value") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) + dev.off() + + + + + # Descriptive tables of the clusters + clusterDesc=matrix(0,nrow=9,ncol=nbClust) + for(i in 1:nbClust){ + clusterDesc[,i]=c(i, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, + length(which(t(summaryClusters["Median",,i])>50)), + length(which(resval[,i]>1.96)), + length(which(resval[,i]>3.29)), + length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>50)), + length(which(apply(datSpecies,2,function (x) (sizeClusters[i]-length(which(x[clusters==i]==0)))/sizeClusters[i]*100)>90)), + sizeClusters[i]) + } + rownames(clusterDesc)=c("Number of species", + "to have 50% of catch", "to have 90% of catch", + "with a median higher than 50", + "with a test-value > 1.96", "with a test-value > 3.29", + "catch in 50% of the logevents", "catch in 90% of the logevents", + "Clusters size") + colnames(clusterDesc)=1:nbClust + clusterDesc2=as.data.frame(clusterDesc) + + + # Summary tables of the clusters + namesSpecies=matrix(NA,nrow=nbClust,ncol=10) + namesCapt=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) + namesTarget=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) + tabLibname=matrix(NA,nrow=nbClust,ncol=10) + listLibname=list() + + for(i in 1:nbClust){ + namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] + a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) + colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] + if(length(a)!=0){ + namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) + } + namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) + } + + for(i in 1:nbClust){ + listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" + else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) + tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) + } + + tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) + tabTestVal=matrix(NA,nrow=nbClust,ncol=10) + tabPropLog=matrix(NA,nrow=nbClust,ncol=10) + + for(i in 1:nbClust){ + print("-----------------------------------------------------------------") + print(paste("Cluster",i)) + propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabPropCatch[i,1:length(propCatch)]=propCatch + print(propCatch) + testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabTestVal[i,1:length(testVal)]=testVal + print(testVal) + propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (sizeClusters[i]-length(which(datSpecies[clusters==i,x]==0)))/sizeClusters[i]*100),digits=1)>=0.1)] + tabPropLog[i,1:length(propLog)]=propLog + print(propLog) + } + + tabClusters=array(0,dim=c(10,5,nbClust)) + dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") + dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) + } + + sizeTabClusters=numeric() + for(i in 1:nbClust){ + sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) + } + + + # Target Species + # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch + # - Test-value > thresholdTestValue + # - % Logevents > thresholdLogevents + thresholdCatch=75 + thresholdTestValue=3 + thresholdLogevents=30 + + sppCumCatch=list() + sppTestValue=list() + sppLogevents=list() + targetSpeciesByCluster=list() + + for (i in 1:nbClust){ + percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) + nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] + + sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] + + targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) + targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) + } + + # List of metiers (level 7) + listMetiersL7=list() + for (i in 1:nbClust){ + metiersClusteri=targetSpeciesByCluster[[i]] + metiersClusteri=as.character(unique(unlist(metiersClusteri))) + metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") + listMetiersL7[[i]]=metiersClusteri + } + + # Metier (level 7) of each logevent + metierByLogeventL7=unlist(sapply(clusters,function(x) listMetiersL7[[x]])) + + + + # Create csv tables + write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") + + dfClust=data.frame() + dfClust=paste("Clust ",1:nbClust,sep="") + for(i in 1:nbClust){ + write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) + write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + } + + + LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) + print(" --- end of step 3 ---") + print(Sys.time()-t1) + + return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, sizeClusters=sizeClusters, + betweenVarClassifOnTot=betweenVarClassifOnTot, mProfilSample=mProfilSample, + nbClust=nbClust, summaryClusters=summaryClusters, testValues=resval, + testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, + descClusters=clusterDesc2, tabClusters=tabClusters, + targetSpecies=targetSpeciesByCluster)) + + } else + + + + + +######################################################################################################################################## KMEANS + + if(methMetier=="kmeans"){ + # Calculation of optimal k thanks to within variance + nbLog=nrow(datLog) + + varintra=numeric() + for (k in 2:15){ + clustersKmeans=kmeans(datLog, k, iter.max=60, nstart=10) + varintra[k]=1/nbLog*sum(clustersKmeans$withinss) + } + nbClust=which(scree(varintra)[,"epsilon"]<0)[3] + + + png(paste(analysisName,"Within variance of the classification.png",sep="_"), width = 1200, height = 800) + plot(varintra,main="Within clusters variance",xlab="Number of clusters",ylab="Within Variance") + dev.off() + + Store(objects()) + gc(reset=TRUE) + + # KMEANS with k optimal + clusters=kmeans(datLog, nbClust, iter.max=60, nstart=10, algorithm="Hartigan-Wong") + + + # Within and between variance of clusters and classification + centerOfGravityClassif=numeric() + withinVarClusters=numeric() + sizeClusti=numeric() + centerOfGravityDatLog=colMeans(datLog) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) + for(k in 1:nbClust){ # Within variance by cluster + + clusti=datLog[which(clusters$cluster==k),] + if(length(which(clusters$cluster==k))==1) centerOfGravityClusti=clusti + else centerOfGravityClusti=colMeans(clusti) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) + sizeClusti[k]=length(which(clusters$cluster==k)) + if(length(which(clusters$cluster==k))==1) withinVarClusters[k]=0 + else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) + + } + # Between variance + classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) + # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification + withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + + + # Compute the test-values for species + resval=test.values(clusters$cluster,datSpecies) + # Determine the target species + target=targetspecies(resval) + rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) + + + # Compute the percentage of logevents catching each species by cluster + mainSpecies=colnames(datSpecies) + percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) + for(i in 1:nbClust){ + percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1) + } + + + # Projections on the first factorial plans + png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(2,3)) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") + if(dim(datLog)[2]>2) { + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") + plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") + if(dim(datLog)[2]>3) { + plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") + plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") + plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="Projection of Kmeans classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") + }} + par(op) + dev.off() + + # For a paper + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_Kmeans',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$size))[as.numeric(clusters$cluster)], main="", xlab="Axis 2", ylab="Axis 3") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_2_3_Kmeans',sep="_"), type='png') + dev.off() + + + # Catch profile of the dataset + meanprofile=colMeans(datSpecies) + png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) + op <- par(las=2) + barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") + par(op) + mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + dev.off() + + + # Catch profile by cluster + nbSpec=ncol(datSpecies) + summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) + dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") + dimnames(summaryClusters)[[2]]=names(meanprofile) + dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + if(clusters$size[i]==1){ + summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$cluster==i),])),2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + }else{ + summaryClusters[,,i]=apply(datSpecies[which(clusters$cluster==i),],2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + } + } + # Species names for catch profile plots + nameSpPlot=character() + catchMeanThreshold=2 + for(i in 1:nbClust){ + namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) + numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) + nameSpPloti=rep("",nbSpec) + nameSpPloti[numSpi]=namSpi + nameSpPlot=rbind(nameSpPlot,nameSpPloti) + } + png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Catch profile by cluster","\n","\n",sep="")) + dev.off() + + + # For a paper : levelplot + X11(4,4) + mat <- t(summaryClusters["Mean",,]) + #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", + # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] + rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] + sp <- apply(mat,2, function(x) length(which(x>20))) + sp2=sp[which(sp>=1)] + #colnames(mat)[sp==0] <- "" + mat2=mat[,which(colnames(mat)%in%names(sp2))] + cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") + print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) + savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') + dev.off() + + + # Standard deviation profile by cluster + sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) + namSdPlot=character() + SdThreshold=2 + for(i in 1:nbClust){ + if(length(which(clusters$cluster==i))==1){ sdprofilclusti=rep(0,nbSpec) + }else{sdprofilclusti=apply(datSpecies[which(clusters$cluster==i),],2,sd)} + namSDi=names(which(sdprofilclusti>SdThreshold)) + numSDi=which(sdprofilclusti>SdThreshold) + namSdPloti=rep("",nbSpec) + namSdPloti[numSDi]=namSDi + sdprofil[i,]=sdprofilclusti + namSdPlot=rbind(namSdPlot,namSdPloti) + } + rownames(sdprofil) <- 1:nrow(sdprofil) + png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) + dev.off() + + + # Number of Logevents by cluster + x=c(1:nbClust) + png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) + coord=barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") + barplot(clusters$size, names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") + text(coord,clusters$size+400,clusters$size,font=2,xpd=NA) + dev.off() + + + # Profile of test-values by cluster + targetresval=numeric() + nameTargetPlot=character() + for(i in 1:nbClust){ + nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + nameTargetPloti=rep("",nbSpec) + nameTargetPloti[numtargeti]=nomtargeti + nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) + targetresvalclusti=rep(0,nbSpec) + targetresvalclusti[numtargeti]=resval[nomtargeti,i] + targetresval=rbind(targetresval,targetresvalclusti) + } + + png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(targetresval[i,], cex.names=1, names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) + dev.off() + + + + + # Descriptive tables of the clusters + clusterDesc=matrix(0,nrow=9,ncol=nbClust) + for(i in 1:nbClust){ + clusterDesc[,i]=c(i, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, + length(which(t(summaryClusters["Median",,i])>50)), + length(which(resval[,i]>1.96)), + length(which(resval[,i]>3.29)), + length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>50)), + length(which(apply(datSpecies,2,function (x) (clusters$size[i]-length(which(x[clusters$cluster==i]==0)))/clusters$size[i]*100)>90)), + clusters$size[i]) + } + rownames(clusterDesc)=c("Number of species", + "to have 50% of catch", "to have 90% of catch", + "with a median higher than 50", + "with a test-value > 1.96", "with a test-value > 3.29", + "catch in 50% of the logevents", "catch in 90% of the logevents", + "Clusters size") + colnames(clusterDesc)=1:nbClust + clusterDesc2=as.data.frame(clusterDesc) + + + # Summary tables of the clusters + namesSpecies=matrix(NA,nrow=nbClust,ncol=10) + namesCapt=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) + namesTarget=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) + tabLibname=matrix(NA,nrow=nbClust,ncol=10) + listLibname=list() + + for(i in 1:nbClust){ + namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] + a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) + colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] + if(length(a)!=0){ + namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) + } + namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) + } + + for(i in 1:nbClust){ + listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" + else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) + tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) + } + + tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) + tabTestVal=matrix(NA,nrow=nbClust,ncol=10) + tabPropLog=matrix(NA,nrow=nbClust,ncol=10) + + for(i in 1:nbClust){ + print("-----------------------------------------------------------------") + print(paste("Cluster",i)) + propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabPropCatch[i,1:length(propCatch)]=propCatch + print(propCatch) + testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabTestVal[i,1:length(testVal)]=testVal + print(testVal) + propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$size[i]-length(which(datSpecies[clusters$cluster==i,x]==0)))/clusters$size[i]*100),digits=1)>=0.1)] + tabPropLog[i,1:length(propLog)]=propLog + print(propLog) + } + + tabClusters=array(0,dim=c(10,5,nbClust)) + dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") + dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) + } + + sizeTabClusters=numeric() + for(i in 1:nbClust){ + sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) + } + + + # Target Species + # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch + # - Test-value > thresholdTestValue + # - % Logevents > thresholdLogevents + thresholdCatch=75 + thresholdTestValue=3 + thresholdLogevents=30 + + sppCumCatch=list() + sppTestValue=list() + sppLogevents=list() + targetSpeciesByCluster=list() + + for (i in 1:nbClust){ + percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) + nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] + + sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] + + targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) + targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) + } + + # List of metiers (level 7) + listMetiersL7=list() + for (i in 1:nbClust){ + metiersClusteri=targetSpeciesByCluster[[i]] + metiersClusteri=as.character(unique(unlist(metiersClusteri))) + metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") + listMetiersL7[[i]]=metiersClusteri + } + + # Metier (level 7) of each logevent + metierByLogeventL7=unlist(sapply(clusters$cluster,function(x) listMetiersL7[[x]])) + + + + # Create csv tables + write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") + + dfClust=data.frame() + dfClust=paste("Clust ",1:nbClust,sep="") + for(i in 1:nbClust){ + write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) + write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + } + + + LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) + print(" --- end of step 3 ---") + print(Sys.time()-t1) + + return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, + betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, + summaryClusters=summaryClusters, testValues=resval, + testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, + descClusters=clusterDesc2, tabClusters=tabClusters, + targetSpecies=targetSpeciesByCluster)) + + } else + + + + + +######################################################################################################################################## PAM + + if(methMetier=="pam"){ + + # Calculation of optimal k thanks to the silhouette (second maximum) + nbLog=nrow(datLog) + + clustersPam.silcoeff=numeric() + clustersPam.silcoeff[1]=0 + clustersPam.silcoeff[2]=0 + clustersPam.silcoeff[3]=0 + + k=2 + compMax=1 + repeat{ + k=k+2 + print(k) + clustersPam=pam(datLog,k) + clustersPam.silcoeff[k]=clustersPam$silinfo$avg.width + clustersPam=pam(datLog,k+1) + clustersPam.silcoeff[k+1]=clustersPam$silinfo$avg.width + if((clustersPam.silcoeff[k-2]clustersPam.silcoeff[k]) & compMax<=2){ + if(compMax==2){ + nbClust=k-1 + print(paste("2e max =",k-1)) + print(paste("nbClust =",nbClust)) + break + } else { + compMax=compMax+1 + print(paste("compMax1 =",compMax)) + print(paste("1er max =",k-1)) + } + } + if((clustersPam.silcoeff[k-1]clustersPam.silcoeff[k+1]) & compMax<=2){ + if(compMax==2){ + nbClust=k + print(paste("2e max =",k)) + print(paste("nbClust =",nbClust)) + break + } else { + compMax=compMax+1 + print(paste("compMax2 =",compMax)) + print(paste("1er max =",k)) + } + } + Store(objects()) + gc(reset=TRUE) + } + + png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800) + plot(clustersPam.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette") # k optimal corresponds to second maximum of silhouette's coefficients + dev.off() + + Store(objects()) + gc(reset=TRUE) + + cat("PamSilCoeff",clustersPam.silcoeff,"\n") + + # PAM with optimal k + clusters=pam(datLog,nbClust) + summary(clusters) + + + # Within and between variance of clusters and classification + centerOfGravityClassif=numeric() + withinVarClusters=numeric() + sizeClusti=numeric() + centerOfGravityDatLog=colMeans(datLog) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) + for(k in 1:nbClust){ # Within variance by cluster + + clusti=datLog[which(clusters$clustering==k),] + if(length(which(clusters$clustering==k))==1) centerOfGravityClusti=clusti + else centerOfGravityClusti=colMeans(clusti) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) + sizeClusti[k]=length(which(clusters$clustering==k)) + if(length(which(clusters$clustering==k))==1) withinVarClusters[k]=0 + else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) + + } + # Between variance + classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) + # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification + withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + + + # Compute the test-values for species + resval=test.values(clusters$clustering,datSpecies) + # Determine the target species + target=targetspecies(resval) + nbClust=length(clusters$id.med) + rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) + + + # Compute the percentage of logevents catching each species by cluster + mainSpecies=colnames(datSpecies) + percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) + for(i in 1:nbClust){ + percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1) + } + + + # Projections on the first factorial plans + png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(2,3)) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") + if(dim(datLog)[2]>2) { + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") + plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") + if(dim(datLog)[2]>3) { + plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") + plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") + plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="Projection of PAM classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") + }} + par(op) + dev.off() + + + # For a paper + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_PAM',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$id.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_2_3_PAM',sep="_"), type='png') + dev.off() + + + # Catch profile of the dataset + meanprofile=colMeans(datSpecies) + png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) + op <- par(las=2) + barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") + par(op) + mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + dev.off() + + + # Catch profile by cluster + nbSpec=ncol(datSpecies) + summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) + dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") + dimnames(summaryClusters)[[2]]=names(meanprofile) + dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + if(clusters$clusinfo[i,1]==1){ + summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + }else{ + summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + } + } + # Species names for catch profile plots + nameSpPlot=character() + catchMeanThreshold=2 + for(i in 1:nbClust){ + namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) + numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) + nameSpPloti=rep("",nbSpec) + nameSpPloti[numSpi]=namSpi + nameSpPlot=rbind(nameSpPlot,nameSpPloti) + } + png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Catch profile by cluster","\n","\n",sep="")) + dev.off() + + + # For a paper : levelplot + X11(4,4) + mat <- t(summaryClusters["Mean",,]) + #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", + # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] + rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] + sp <- apply(mat,2, function(x) length(which(x>20))) + sp2=sp[which(sp>=1)] + #colnames(mat)[sp==0] <- "" + mat2=mat[,which(colnames(mat)%in%names(sp2))] + cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") + print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) + savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') + dev.off() + + + # Standard deviation profile by cluster + sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) + namSdPlot=character() + SdThreshold=2 + for(i in 1:nbClust){ + if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec) + }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)} + namSDi=names(which(sdprofilclusti>SdThreshold)) + numSDi=which(sdprofilclusti>SdThreshold) + namSdPloti=rep("",nbSpec) + namSdPloti[numSDi]=namSDi + sdprofil[i,]=sdprofilclusti + namSdPlot=rbind(namSdPlot,namSdPloti) + } + rownames(sdprofil) <- 1:nrow(sdprofil) + png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) + dev.off() + + + # Number of Logevents by cluster + x=c(1:nbClust) + png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) + coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") + barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") + text(coord,clusters$clusinfo[,1]+200,clusters$clusinfo[,1],font=2,xpd=NA) + dev.off() + + + # Profile of test-values by cluster + targetresval=numeric() + nameTargetPlot=character() + for(i in 1:nbClust){ + nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + nameTargetPloti=rep("",nbSpec) + nameTargetPloti[numtargeti]=nomtargeti + nameTargetPlot=rbind(nameTargetPlot,nameTargetPloti) + targetresvalclusti=rep(0,nbSpec) + targetresvalclusti[numtargeti]=resval[nomtargeti,i] + targetresval=rbind(targetresval,targetresvalclusti) + } + + png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) + dev.off() + + + + # Descriptive tables of the clusters + clusterDesc=matrix(0,nrow=9,ncol=nbClust) + for(i in 1:nbClust){ + clusterDesc[,i]=c(i, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, + length(which(t(summaryClusters["Median",,i])>50)), + length(which(resval[,i]>1.96)), + length(which(resval[,i]>3.29)), + length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)), + length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)), + clusters$clusinfo[i,1]) + } + rownames(clusterDesc)=c("Number of species", + "to have 50% of catch", "to have 90% of catch", + "with a median higher than 50", + "with a test-value > 1.96", "with a test-value > 3.29", + "catch in 50% of the logevents", "catch in 90% of the logevents", + "Clusters size") + colnames(clusterDesc)=1:nbClust + clusterDesc2=as.data.frame(clusterDesc) + + + # Summary tables of the clusters + namesSpecies=matrix(NA,nrow=nbClust,ncol=10) + namesCapt=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) + namesTarget=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) + tabLibname=matrix(NA,nrow=nbClust,ncol=10) + listLibname=list() + + for(i in 1:nbClust){ + namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] + a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) + colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] + if(length(a)!=0){ + namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) + } + namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) + } + + for(i in 1:nbClust){ + listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" + else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) + tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) + } + + tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) + tabTestVal=matrix(NA,nrow=nbClust,ncol=10) + tabPropLog=matrix(NA,nrow=nbClust,ncol=10) + + for(i in 1:nbClust){ + print("-----------------------------------------------------------------") + print(paste("Cluster",i)) + propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabPropCatch[i,1:length(propCatch)]=propCatch + print(propCatch) + testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabTestVal[i,1:length(testVal)]=testVal + print(testVal) + propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)] + tabPropLog[i,1:length(propLog)]=propLog + print(propLog) + } + + tabClusters=array(0,dim=c(10,5,nbClust)) + dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") + dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) + } + + sizeTabClusters=numeric() + for(i in 1:nbClust){ + sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) + } + + + # Target Species + # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch + # - Test-value > thresholdTestValue + # - % Logevents > thresholdLogevents + thresholdCatch=75 + thresholdTestValue=3 + thresholdLogevents=30 + + sppCumCatch=list() + sppTestValue=list() + sppLogevents=list() + targetSpeciesByCluster=list() + + for (i in 1:nbClust){ + percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) + nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] + + sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] + + targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) + targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) + } + + # List of metiers (level 7) + listMetiersL7=list() + for (i in 1:nbClust){ + metiersClusteri=targetSpeciesByCluster[[i]] + metiersClusteri=as.character(unique(unlist(metiersClusteri))) + metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") + listMetiersL7[[i]]=metiersClusteri + } + + # Metier (level 7) of each logevent + metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]])) + + + # Create csv tables + write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") + + dfClust=data.frame() + dfClust=paste("Clust ",1:nbClust,sep="") + for(i in 1:nbClust){ + write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) + write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + } + + + LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) + print(" --- end of step 3 ---") + print(Sys.time()-t1) + + return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, + betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, + summaryClusters=summaryClusters, testValues=resval, + testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, + descClusters=clusterDesc2, tabClusters=tabClusters, + targetSpecies=targetSpeciesByCluster)) + + } else + + + + + +######################################################################################################################################## CLARA + + if(methMetier=="clara"){ + nbLog=nrow(datLog) + propSample=0.1 + + # Calculation of optimal k thanks to the silhouette (second maximum) + clustersClara.silcoeff=numeric() + clustersClara.silcoeff[1]=0 + clustersClara.silcoeff[2]=0 + clustersClara.silcoeff[3]=0 + k=2 + compMax=1 + repeat{ + k=k+2 + print(k) + clustersClara=clara(datLog, k, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*k))) + clustersClara.silcoeff[k]=clustersClara$silinfo$avg.width + clustersClara=clara(datLog, k+1, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*(k+1)))) + clustersClara.silcoeff[k+1]=clustersClara$silinfo$avg.width + if((clustersClara.silcoeff[k-2]clustersClara.silcoeff[k]) & compMax<=2){ + if(compMax==2){ + nbClust=k-1 + print(paste("2e max =",k-1)) + print(paste("nbClust =",nbClust)) + break + } else { + compMax=compMax+1 + print(paste("compMax1 =",compMax)) + print(paste("1er max =",k-1)) + } + } + if((clustersClara.silcoeff[k-1]clustersClara.silcoeff[k+1]) & compMax<=2){ + if(compMax==2){ + nbClust=k + print(paste("2e max =",k)) + print(paste("nbClust =",nbClust)) + break + } else { + compMax=compMax+1 + print(paste("compMax2 =",compMax)) + print(paste("1er max =",k)) + } + } + Store(objects()) + gc(reset=TRUE) + } + + + png(paste(analysisName,"Silhouette of the classification.png",sep="_"), width = 1200, height = 800) + plot(clustersClara.silcoeff, main="Silhouette of the classification", xlab="Number of clusters", ylab="Silhouette") # k optimal corresponds to maximum of silhouette's coefficients + dev.off() + + Store(objects()) + gc(reset=TRUE) + + cat("ClaraSilCoeff",clustersClara.silcoeff,"\n") + + + # CLARA with optimal k + clusters=clara(datLog, nbClust, metric=param1, stand=FALSE, samples=5, sampsize=min(nbLog,round(propSample*nbLog+10*nbClust))) # CLARA with optimal k + summary(clusters) + + + # Within and between variance of clusters and classification + centerOfGravityClassif=numeric() + withinVarClusters=numeric() + sizeClusti=numeric() + centerOfGravityDatLog=colMeans(datLog) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityDatLog) + for(k in 1:nbClust){ # Within variance by cluster + + clusti=datLog[which(clusters$clustering==k),] + if(length(which(clusters$clustering==k))==1) centerOfGravityClusti=clusti + else centerOfGravityClusti=colMeans(clusti) + centerOfGravityClassif=rbind(centerOfGravityClassif,centerOfGravityClusti) + sizeClusti[k]=length(which(clusters$clustering==k)) + if(length(which(clusters$clustering==k))==1) withinVarClusters[k]=0 + else withinVarClusters[k]=sum(apply(clusti,1,function(x) withinVar(x,centerOfGravityClusti))) + + } + # Between variance + classifBetweenVar=(1/nbLog)*sum(sizeClusti*((dist(centerOfGravityClassif)[1:nbClust])^2)) + # Within variance of clusters on totale variance (pourcent) and between variance on totale variance of classification + withinVarClusterOnTot=(1/nbLog)*sum(withinVarClusters)/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + betweenVarClassifOnTot=classifBetweenVar/(classifBetweenVar+(1/nbLog)*sum(withinVarClusters))*100 + + + # Compute the test-values for species + resval=test.values(clusters$cluster,datSpecies) + # Determine the target species + target=targetspecies(resval) + rownames(target$tabnomespcib)=paste("Cluster",1:nbClust) + + + # Compute the percentage of logevents catching each species by cluster + mainSpecies=colnames(datSpecies) + percLogevents=matrix(0,ncol=length(mainSpecies),nrow=nbClust,dimnames=list(paste("Cluster ",1:nbClust,sep=""),mainSpecies)) + for(i in 1:nbClust){ + percLogevents[i,]=round(sapply(mainSpecies,function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1) + } + + + # Projections on the first factorial plans + png(paste(analysisName,"Projections.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(2,3)) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-2", xlab="axis 1", ylab="axis 2") + if(dim(datLog)[2]>2) { + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-3", xlab="axis 2", ylab="axis 3") + plot(datLog[,1], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-3", xlab="axis 1", ylab="axis 3") + if(dim(datLog)[2]>3) { + plot(datLog[,1], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 1-4", xlab="axis 1", ylab="axis 4") + plot(datLog[,2], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 2-4", xlab="axis 2", ylab="axis 4") + plot(datLog[,3], datLog[,4], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="Projection of CLARA classification on the factorial plan 3-4", xlab="axis 3", ylab="axis 4") + }} + par(op) + dev.off() + + + # For a paper + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,1], datLog[,2], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 1", ylab="Axis 2") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_1_2_CLARA',sep="_"), type='png') + dev.off() + + X11(5,5) + plot(datLog[,2], datLog[,3], pch=21, bg=rainbow(length(clusters$i.med))[as.numeric(clusters$clustering)], main="", xlab="Axis 2", ylab="Axis 3") + abline(h=0, lty=2) ; abline(v=0, lty=2) + savePlot(filename=paste(analysisName,'projections_2_3_CLARA',sep="_"), type='png') + dev.off() + + # Catch profile of the dataset + meanprofile=colMeans(datSpecies) + png(paste(analysisName,"Catch profile of the dataset.png",sep="_"), width = 1200, height = 800) + op <- par(las=2) + barplot(meanprofile, main="Catch profile of the dataset", xlab="Species", ylab="Percentage of catch") + par(op) + mtext(paste(nrow(datSpecies)," logevents"), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + dev.off() + + + # Catch profile by cluster + nbSpec=ncol(datSpecies) + summaryClusters=array(0,dim=c(6,nbSpec,nbClust)) + dimnames(summaryClusters)[[1]]=c("Min.","1st Qu.","Median", "Mean", "3rd Qu.", "Max.") + dimnames(summaryClusters)[[2]]=names(meanprofile) + dimnames(summaryClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + if(clusters$clusinfo[i,1]==1){ + summaryClusters[,,i]=apply(t(as.matrix(datSpecies[which(clusters$clustering==i),])),2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + }else{ + summaryClusters[,,i]=apply(datSpecies[which(clusters$clustering==i),],2, + function(x) rbind(min(as.vector(x)),quantile(as.vector(x),0.25),quantile(as.vector(x),0.50),mean(as.vector(x)),quantile(as.vector(x),0.75),max(as.vector(x)))) + } + } + # Species names for catch profile plots + nameSpPlot=character() + catchMeanThreshold=2 + for(i in 1:nbClust){ + namSpi=names(meanprofile[which(t(summaryClusters["Mean",,i])>catchMeanThreshold)]) + numSpi=which(t(summaryClusters["Mean",,i])>catchMeanThreshold) + nameSpPloti=rep("",nbSpec) + nameSpPloti[numSpi]=namSpi + nameSpPlot=rbind(nameSpPlot,nameSpPloti) + } + # Plot + png(paste(analysisName,"Catch profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(t(summaryClusters["Mean",,i]), names.arg=nameSpPlot[i,], xlab="Species", ylab="Percentage of catch", col="gray") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Catch profile by cluster","\n","\n",sep="")) + dev.off() + + + # For a paper : levelplot + X11(4,4) + mat <- t(summaryClusters["Mean",,]) + #rownames(mat) <- c("I","II","III","IV","V","VI","VII","VIII","IX","X","XI","XII","XIII","XIV","XV", + # "XVI","XVII","XVIII","XIX","XX")[1:nrow(mat)] + rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] + sp <- apply(mat,2, function(x) length(which(x>20))) + sp2=sp[which(sp>=1)] + #colnames(mat)[sp==0] <- "" + mat2=mat[,which(colnames(mat)%in%names(sp2))] + cc <- colorRampPalette(c("white", "black"),space = "rgb", interpolate="spline") + print(levelplot(mat2, cuts=4, aspect=1, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.7), colorkey=list(space="right", at=c(0,20,40,60,80,100), width=1.1))) + savePlot(filename=paste(analysisName,'mean_profile_by_cluster_levelplot',sep="_"), type='png') + dev.off() + + # OR # + mat <- t(summaryClusters["Mean",,]) + #mat=mat[,order(colnames(mat),decreasing=TRUE)] # si on veut mettre les especes par ordre descendant + rownames(mat) <- c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20")[1:nrow(mat)] + sp <- apply(mat,2, function(x) length(which(x>20))) + colnames(mat)[sp==0] <- "" + cc <- colorRampPalette(c("white", "steelblue2", "blue4"),space = "rgb", interpolate="spline") + png(filename = paste(paste(analysisName,'mean_profile_by_cluster_levelplot_blue',sep="_"),".png",sep=""), width = 400, height = 800) + print(levelplot(mat, cuts=4, aspect=3, xlab="", ylab="", col.regions=cc(5), at=c(0,20,40,60,80,100), scales=list(cex=0.8), colorkey=list(space="right", at=c(0,20,40,60,80,100)))) + dev.off() + + + # Standard deviation profile by cluster + sdprofil=matrix(0,nrow=nbClust,ncol=nbSpec) + namSdPlot=character() + SdThreshold=5 + for(i in 1:nbClust){ + if(length(which(clusters$clustering==i))==1){ sdprofilclusti=rep(0,nbSpec) + }else{sdprofilclusti=apply(datSpecies[which(clusters$clustering==i),],2,sd)} + namSDi=names(which(sdprofilclusti>SdThreshold)) + numSDi=which(sdprofilclusti>SdThreshold) + namSdPloti=rep("",nbSpec) + namSdPloti[numSDi]=namSDi + sdprofil[i,]=sdprofilclusti + namSdPlot=rbind(namSdPlot,namSdPloti) + } + rownames(sdprofil) <- 1:nrow(sdprofil) + png(paste(analysisName,"Standard deviation profile by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(sdprofil[i,], names.arg=namSdPlot[i,], xlab="Species", ylab="Percentage of catch") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Standard deviation profile by cluster","\n","\n",sep="")) + dev.off() + + + # Number of Logevents by cluster + x=c(1:nbClust) + png(paste(analysisName,"Number of Logevents by cluster.png",sep="_"), width = 1200, height = 800) + coord=barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents") + barplot(clusters$clusinfo[,1], names.arg=x, main="Number of Logevents by cluster", xlab="Cluster", ylab="Number of Logevents", col="skyblue") + text(coord,clusters$clusinfo[,1]+5,clusters$clusinfo[,1],font=2,xpd=NA) + dev.off() + + + # Profile of test-values by cluster + targetresval=matrix(0,nrow=nbClust,ncol=nbSpec) + colnames(targetresval)=colnames(datSpecies) + rownames(targetresval)=1:nbClust + nameTargetPlot=matrix(NA,nrow=nbClust,ncol=nbSpec) + for(i in 1:nbClust){ + nomtargeti=as.character(target$tabnomespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + numtargeti=as.numeric(target$tabnumespcib[i,which(!is.na(target$tabnumespcib[i,]))]) + nameTargetPlot[i,numtargeti]=nomtargeti + targetresval[i,numtargeti]=resval[nomtargeti,i] + } + + png(paste(analysisName,"Profile of test-values by cluster.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(ceiling(sqrt(nbClust)),round(sqrt(nbClust)))) + for(i in 1:nbClust){ + op2 <- par(las=2) + barplot(targetresval[i,],names.arg=nameTargetPlot[i,], xlab="Species", ylab="Test-value") + par(op2) + mtext(paste("Cluster",i), side=3, outer=FALSE, adj=0.5, line=0.5, col="darkblue") + } + par(op) + title(main=paste("Profile of test-values by cluster","\n","\n",sep="")) + dev.off() + + + + # Descriptive tables of the clusters + clusterDesc=matrix(0,nrow=9,ncol=nbClust) + for(i in 1:nbClust){ + clusterDesc[,i]=c(i, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<50))+1, + length(which(cumsum(t(summaryClusters["Mean",,i])[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)])<90))+1, + length(which(t(summaryClusters["Median",,i])>50)), + length(which(resval[,i]>1.96)), + length(which(resval[,i]>3.29)), + length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>50)), + length(which(apply(datSpecies,2,function (x) (clusters$clusinfo[i,1]-length(which(x[clusters$clustering==i]==0)))/clusters$clusinfo[i,1]*100)>90)), + clusters$clusinfo[i,1]) + } + rownames(clusterDesc)=c("Number of species", + "to have 50% of catch", "to have 90% of catch", + "with a median higher than 50", + "with a test-value > 1.96", "with a test-value > 3.29", + "catch in 50% of the logevents", "catch in 90% of the logevents", + "Clusters size") + colnames(clusterDesc)=1:nbClust + clusterDesc2=as.data.frame(clusterDesc) + + + # Summary tables of the clusters + namesSpecies=matrix(NA,nrow=nbClust,ncol=10) + namesCapt=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesCatch = min(5,dim(t(summaryClusters["Mean",,]))[[2]]) + namesTarget=matrix(NA,nrow=nbClust,ncol=5) + nbSpeciesVT = min(5,dim(target$tabnomespcib)[[2]]) + tabLibname=matrix(NA,nrow=nbClust,ncol=10) + listLibname=list() + + for(i in 1:nbClust){ + namesCapt[i,]=colnames(t(summaryClusters["Mean",,i]))[order(t(summaryClusters["Mean",,i]),decreasing=TRUE)][1:nbSpeciesCatch] + a=as.data.frame(t(summaryClusters["Mean",target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])],i])) + colnames(a)= target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])] + if(length(a)!=0){ + namesTarget[i,1:length(target$tabnomespcib[i,1:nbSpeciesVT][!is.na(target$tabnomespcib[i,1:nbSpeciesVT])])]=colnames(a[order(a,decreasing=TRUE)]) + } + namesSpecies[i,1:length(union(namesCapt[i,],namesTarget[i,]))]=union(namesCapt[i,],namesTarget[i,]) + } + + for(i in 1:nbClust){ + listLibname[[i]]=lapply(as.list(namesSpecies[i,]), function(x) if(length(which(correspLevel7to5[,"X3A_CODE"]==x))==0) "NA" + else correspLevel7to5[which(correspLevel7to5[,"X3A_CODE"]==x),"French_name"]) + tabLibname[i,]=unlist(lapply(listLibname[[i]], function(x) as.character(unlist(x)))) + } + + tabPropCatch=matrix(NA,nrow=nbClust,ncol=10) + tabTestVal=matrix(NA,nrow=nbClust,ncol=10) + tabPropLog=matrix(NA,nrow=nbClust,ncol=10) + + for(i in 1:nbClust){ + print("-----------------------------------------------------------------") + print(paste("Cluster",i)) + propCatch=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabPropCatch[i,1:length(propCatch)]=propCatch + print(propCatch) + testVal=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) resval[x,i]),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) t(summaryClusters["Mean",x,i])),digits=1)>=0.1)] + tabTestVal[i,1:length(testVal)]=testVal + print(testVal) + propLog=round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)[which(round(sapply(namesSpecies[i,][!is.na(namesSpecies[i,])],function(x) (clusters$clusinfo[i,1]-length(which(datSpecies[clusters$clustering==i,x]==0)))/clusters$clusinfo[i,1]*100),digits=1)>=0.1)] + tabPropLog[i,1:length(propLog)]=propLog + print(propLog) + } + + tabClusters=array(0,dim=c(10,5,nbClust)) + dimnames(tabClusters)[[2]]=c("Libname","FAO","Test-value","% Catch","% Logevents") + dimnames(tabClusters)[[3]]=paste("Cluster",1:nbClust) + for(i in 1:nbClust){ + tabClusters[,,i]=cbind(tabLibname[i,],namesSpecies[i,],tabTestVal[i,],tabPropCatch[i,],tabPropLog[i,]) + } + + sizeTabClusters=numeric() + for(i in 1:nbClust){ + sizeTabClusters[i]=min(length(namesSpecies[i,!is.na(namesSpecies[i,])]),length(tabPropCatch[i,!is.na(tabPropCatch[i,])]),length(tabTestVal[i,!is.na(tabTestVal[i,])]),length(tabPropLog[i,!is.na(tabPropLog[i,])])) + } + + + # Target Species + # Intersection of species from tabClusters having : - % Cumulated Catch > thresholdCatch + # - Test-value > thresholdTestValue + # - % Logevents > thresholdLogevents + thresholdCatch=75 + thresholdTestValue=3 + thresholdLogevents=30 + + sppCumCatch=list() + sppTestValue=list() + sppLogevents=list() + targetSpeciesByCluster=list() + + for (i in 1:nbClust){ + percCatchCum=cumsum(as.numeric(tabClusters[,"% Catch",i])) + nbSpSel=length(which(percCatchCumthresholdTestValue),"FAO",i] + + sppLogevents[[i]]=tabClusters[which(as.numeric(tabClusters[,"% Logevents",i])>thresholdLogevents),"FAO",i] + + targetSpeciesByCluster[[i]]=intersect(sppCumCatch[[i]],sppTestValue[[i]]) + targetSpeciesByCluster[[i]]=intersect(targetSpeciesByCluster[[i]],sppLogevents[[i]]) + } + + # List of metiers (level 7) + listMetiersL7=list() + for (i in 1:nbClust){ + metiersClusteri=targetSpeciesByCluster[[i]] + metiersClusteri=as.character(unique(unlist(metiersClusteri))) + metiersClusteri=paste(unlist(strsplit(metiersClusteri," ")),collapse=" ") + listMetiersL7[[i]]=metiersClusteri + } + + # Metier (level 7) of each logevent + metierByLogeventL7=unlist(sapply(clusters$clustering,function(x) listMetiersL7[[x]])) + + + + # Create csv tables + write.table(clusterDesc2,file="descClusters.csv",col.names=FALSE,sep=";") + + dfClust=data.frame() + dfClust=paste("Clust ",1:nbClust,sep="") + for(i in 1:nbClust){ + write.table(dfClust[i],file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + tabClusti=as.data.frame(tabClusters[1:sizeTabClusters[i],,i]) + write.table(tabClusti,file="tabClusters.csv",append=TRUE,row.names=FALSE,sep=";") + } + + + LE_ID_clust=data.frame(LE_ID=LE_ID,clust=metierByLogeventL7) + print(" --- end of step 3 ---") + print(Sys.time()-t1) + + return(list(LE_ID_clust=LE_ID_clust, clusters=clusters, + betweenVarClassifOnTot=betweenVarClassifOnTot, nbClust=nbClust, + summaryClusters=summaryClusters, testValues=resval, + testValuesSpecies=target$tabnomespcib, percLogevents=percLogevents, + descClusters=clusterDesc2, tabClusters=tabClusters, + targetSpecies=targetSpeciesByCluster)) + + } else stop("methMetier must be hac, kmeans, pam or clara") + # end of the methods + + +} # end of the function "getMetierClusters" + diff --git a/vmstools/R/getTableAfterPCA.r b/vmstools/R/getTableAfterPCA.r index 16ad140..1be5a3f 100644 --- a/vmstools/R/getTableAfterPCA.r +++ b/vmstools/R/getTableAfterPCA.r @@ -1,129 +1,222 @@ -################################################################################ -# STEP 2 OF THE MULTIVARIATE CLASSIFICATION : # -# RUN A PCA ON THE DATASET FROM STEP 1 # -# (2 CRITERIA : 70PERCENTS AND SCREETEST ARE AVAILABLE) # -# IT'S POSSIBLE TO KEEP THE DATASET FROM STEP 1 BY CHOOSING "NOPCA" # -################################################################################ - - -getTableAfterPCA = function(datSpecies,analysisName="",pcaYesNo="pca",criterion="70percents"){ - - LE_ID <- rownames(datSpecies) - NbSpecies <- dim(datSpecies)[2] - datSpecies <- as.matrix(datSpecies,ncol=NbSpecies,nrow=length(LE_ID)) - - print("######## STEP 2 PCA/NO PCA ON CATCH PROFILES ########") - - t1 <- Sys.time() - print(paste("--- Selected method :",pcaYesNo, "---")) - - - if(pcaYesNo=="pca"){ - - print("Running PCA on all axes...") - - # PCA (Principal Component Analysis) - log.pca = PCA(datSpecies, graph=FALSE, ncp=ncol(datSpecies)) - - X11(5,5) - plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1) - savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png') - dev.off() - - X11(5,5) - plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1) - savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png') - dev.off() - - X11(5,5) - plot.PCA(log.pca, choix = "var", axes = 2:3, new.plot=FALSE, title="", lim.cos2.var = 0.1) - savePlot(filename=paste(analysisName,'species_projection_on_the_2_and_3_factorial_axis',sep="_"), type='png') - dev.off() - - X11(5,5) - plot.PCA(log.pca, choix = "ind", axes = 1:2, habillage = "ind", title="", new.plot=FALSE, cex=1.1) - savePlot(filename=paste(analysisName,'projection_of_individuals_on_the_first_two_factorial_axis',sep="_"), type='png') - dev.off() - - - # Determine the number of axis to keep - if(criterion=="70percents"){ - nbaxes=which(log.pca$eig[,3]>70)[1] # we are taking the axis until having 70% of total inertia - cat("--- number of axes:",nbaxes,"\n") - cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n") - } else - # OR - if(criterion=="screetest"){ - nbaxes=which(scree(log.pca$eig[,1])[,"epsilon"]<0)[2] # thanks to the scree-test - cat("--- number of axes:",nbaxes,"\n") - cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n") - } else stop("Criterion for PCA must be 70percents or screetest") - - - # Eigenvalues and relative graphics - log.pca$eig - - png(paste(analysisName,"Eigen values.png",sep="_"), width = 1200, height = 800) - x=1:length(log.pca$eig[,1]) - barplot(log.pca$eig[,1],names.arg=x, main="Eigen values") - dev.off() - - png(paste(analysisName,"Percentage of Inertia.png",sep="_"), width = 1200, height = 800) - color=rep("grey",length(log.pca$eig[,1])) - if(criterion=="screetest") color[1:nbaxes]="green" - barplot(log.pca$eig[,2],names.arg=x, col=color, main="Percentage of Inertia of factorial axis", xlab="Axis", ylab="% of Inertia") - dev.off() - - X11(5,5) - op <- par(no.readonly = TRUE) - par(mar=c(4,4,1,1)) - color=rep("grey",length(log.pca$eig[,1])) - if(criterion=="70percents") color[1:nbaxes]="green" - barplot(log.pca$eig[,3], col=color, ylab="", xlab="", axes=FALSE, cex.names=2) - axis(2, las=2) - abline(h=70, col="red") - text(1,72, "70% of Inertia", col = "red", adj = c(0, -.1)) - mtext("Axes", side=1, adj=0.5, outer=FALSE, line=+1, font=1, cex=1.5) - mtext("% of Inertia", side=2, adj=0.5, outer=FALSE, line=+2.5, font=1, cex=1.5) - savePlot(filename = paste(analysisName,"Cumulative Percentage of Inertia.png",sep="_"),type ="png") - par(op) - dev.off() - - - # Projection of variables "species" on the first factorial axis - png(paste(analysisName,"Projection of Species on first factorial axis.png",sep="_"), width = 1200, height = 800) - op <- par(mfrow=c(2,3)) - plot(log.pca,choix="var",axes = c(1, 2),new.plot=FALSE,lim.cos2.var = 0.3) - plot(log.pca,choix="var",axes = c(2, 3),new.plot=FALSE,lim.cos2.var = 0.3) - plot(log.pca,choix="var",axes = c(1, 3),new.plot=FALSE,lim.cos2.var = 0.3) - plot(log.pca,choix="var",axes = c(1, 4),new.plot=FALSE,lim.cos2.var = 0.3) - plot(log.pca,choix="var",axes = c(2, 4),new.plot=FALSE,lim.cos2.var = 0.3) - #plot(log.pca,choix="var",axes = c(3, 4),new.plot=FALSE,lim.cos2.var = 0.3) - par(op) - title(main=paste("Projection of Species on first factorial axis","\n","\n",sep="")) - dev.off() - - - # PCA with the good number of axis - print("Retaining Principal Components of selected axes...") - log.pca=log.pca$ind$coord[,1:nbaxes] - datLog=round(log.pca,4) - - - } else - - - if(pcaYesNo=="nopca"){ - datLog=datSpecies - } else stop("pcaYesNo must be pca or nopca") - - Store(objects()) - gc(reset=TRUE) - - print(" --- end of step 2 ---") - print(Sys.time()-t1) - - return(datLog) - -} - +################################################################################ +# STEP 2 OF THE MULTIVARIATE CLASSIFICATION : # +# RUN A PCA ON THE DATASET FROM STEP 1 # +# (2 CRITERIA : 70PERCENTS AND SCREETEST ARE AVAILABLE) # +# IT'S POSSIBLE TO KEEP THE DATASET FROM STEP 1 BY CHOOSING "NOPCA" # +################################################################################ + + + + +#' Finding Metiers from a reduced EFLALO dataset, step 2: Options for running a +#' PCA on the selected species. +#' +#' This function represents the second step in the multivariate analysis of +#' logbooks data for identifying metiers. +#' +#' This step of the classification is a user-driven choice of running or not a +#' Principal Component Analysis (PCA) on the catch per logevents of the species +#' retained, expressed in percentage. A PCA will help reducing the +#' multi-dimensional catch matrix to a smaller number of informative components +#' represented by the first n- axes of the PCA transformation. The number of +#' axes can be chosen either by using a scree test (criterion 'screetest') +#' looking for the significant marginal increases of explained inertia, or by +#' selecting all axes cumulating 70 percent of explained inertia (criterion +#' '70percents'). +#' +#' +#' @param datSpecies numerical matrix with Logevents as lines and species as +#' columns, with percentage values (between 0 and 100) of each species in the +#' logevent catches. Logevent ID (LE_ID) should be as row names. Typically, +#' this table will be produced from a eflalo dataset using the function +#' extractTableMainSpecies() +#' @param analysisName character, the name of the run. Used for the file name +#' of the plots. +#' @param pcaYesNo character. An indication of whether a PCA should be +#' performed. Use "pca" if a PCA should be run, and "nopca" if it shouldn't. +#' @param criterion character. Criterion used for selected the number of +#' principal components (axes) retained. The number of axes can be chosen +#' either by using a scree test (criterion 'screetest'), looking for the +#' second-order unsignificant marginal increases of explained inertia, or by +#' selecting all axes cumulating up to 70 percent of explained inertia +#' (criterion '70percents'). If pcaYesNo="nopca", then this criterion should be +#' set as NULL. Experience has shown that the '70percents' criterion may often +#' select a larger number axes than the scree test criterion, and may therefore +#' be more appropriate for analysing large and heterogeneous datasets, by +#' retaining more information for the subsequent clustering. +#' @return If a PCA is run, a number of graphs are produced and directly saved +#' in the working directory. These graphs describe 1) species projection on the +#' first factorial axes, 2) individuals (logevents) projections on the first +#' factorial axes, 3) actual and cumulative percentage of inertia of factorial +#' axes, and 4) eigenvalues. +#' +#' If a PCA is run, the function returns a numerical matrix with Logevents as +#' lines and selected Principal Components as columns. If no PCA is run, the +#' function returns the same matrix as the input, with percentage values by +#' species. +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @seealso selectMainSpecies(), extractTableMainSpecies() +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @examples +#' +#' +#' \dontrun{ +#' +#' data(eflalo) +#' +#' eflalo <- formatEflalo(eflalo) +#' +#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",] +#' # note that output plots will be sent to getwd() +#' analysisName <- "metier_analysis_OTB" +#' +#' dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))] +#' names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]])) +#' +#' explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE) +#' #=> send the LE_ID and LE_KG_SP columns only +#' +#' Step1 <- extractTableMainSpecies(dat, explo$NamesMainSpeciesHAC, +#' paramTotal=95, paramLogevent=100) +#' #=> send the LE_ID and LE_KG_SP columns only +#' +#' rowNamesSave <- row.names(Step1) +#' row.names(Step1) <- 1:nrow(Step1) +#' +#' # Run a PCA +#' Step2 <- getTableAfterPCA(Step1, analysisName, pcaYesNo="pca", +#' criterion="70percents") +#' +#' row.names(Step1) <- rowNamesSave +#' row.names(Step2) <- rowNamesSave +#' +#' } +#' +#' +#' @export getTableAfterPCA +getTableAfterPCA = function(datSpecies,analysisName="",pcaYesNo="pca",criterion="70percents"){ + + LE_ID <- rownames(datSpecies) + NbSpecies <- dim(datSpecies)[2] + datSpecies <- as.matrix(datSpecies,ncol=NbSpecies,nrow=length(LE_ID)) + + print("######## STEP 2 PCA/NO PCA ON CATCH PROFILES ########") + + t1 <- Sys.time() + print(paste("--- Selected method :",pcaYesNo, "---")) + + + if(pcaYesNo=="pca"){ + + print("Running PCA on all axes...") + + # PCA (Principal Component Analysis) + log.pca = PCA(datSpecies, graph=FALSE, ncp=ncol(datSpecies)) + + X11(5,5) + plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1) + savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png') + dev.off() + + X11(5,5) + plot.PCA(log.pca, choix = "var", axes = 1:2, new.plot=FALSE, title="", lim.cos2.var = 0.1) + savePlot(filename=paste(analysisName,'species_projection_on_the_1_and_2_factorial_axis',sep="_"), type='png') + dev.off() + + X11(5,5) + plot.PCA(log.pca, choix = "var", axes = 2:3, new.plot=FALSE, title="", lim.cos2.var = 0.1) + savePlot(filename=paste(analysisName,'species_projection_on_the_2_and_3_factorial_axis',sep="_"), type='png') + dev.off() + + X11(5,5) + plot.PCA(log.pca, choix = "ind", axes = 1:2, habillage = "ind", title="", new.plot=FALSE, cex=1.1) + savePlot(filename=paste(analysisName,'projection_of_individuals_on_the_first_two_factorial_axis',sep="_"), type='png') + dev.off() + + + # Determine the number of axis to keep + if(criterion=="70percents"){ + nbaxes=which(log.pca$eig[,3]>70)[1] # we are taking the axis until having 70% of total inertia + cat("--- number of axes:",nbaxes,"\n") + cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n") + } else + # OR + if(criterion=="screetest"){ + nbaxes=which(scree(log.pca$eig[,1])[,"epsilon"]<0)[2] # thanks to the scree-test + cat("--- number of axes:",nbaxes,"\n") + cat("--- percentage inertia explained:",log.pca$eig[nbaxes,3],"\n") + } else stop("Criterion for PCA must be 70percents or screetest") + + + # Eigenvalues and relative graphics + log.pca$eig + + png(paste(analysisName,"Eigen values.png",sep="_"), width = 1200, height = 800) + x=1:length(log.pca$eig[,1]) + barplot(log.pca$eig[,1],names.arg=x, main="Eigen values") + dev.off() + + png(paste(analysisName,"Percentage of Inertia.png",sep="_"), width = 1200, height = 800) + color=rep("grey",length(log.pca$eig[,1])) + if(criterion=="screetest") color[1:nbaxes]="green" + barplot(log.pca$eig[,2],names.arg=x, col=color, main="Percentage of Inertia of factorial axis", xlab="Axis", ylab="% of Inertia") + dev.off() + + X11(5,5) + op <- par(no.readonly = TRUE) + par(mar=c(4,4,1,1)) + color=rep("grey",length(log.pca$eig[,1])) + if(criterion=="70percents") color[1:nbaxes]="green" + barplot(log.pca$eig[,3], col=color, ylab="", xlab="", axes=FALSE, cex.names=2) + axis(2, las=2) + abline(h=70, col="red") + text(1,72, "70% of Inertia", col = "red", adj = c(0, -.1)) + mtext("Axes", side=1, adj=0.5, outer=FALSE, line=+1, font=1, cex=1.5) + mtext("% of Inertia", side=2, adj=0.5, outer=FALSE, line=+2.5, font=1, cex=1.5) + savePlot(filename = paste(analysisName,"Cumulative Percentage of Inertia.png",sep="_"),type ="png") + par(op) + dev.off() + + + # Projection of variables "species" on the first factorial axis + png(paste(analysisName,"Projection of Species on first factorial axis.png",sep="_"), width = 1200, height = 800) + op <- par(mfrow=c(2,3)) + plot(log.pca,choix="var",axes = c(1, 2),new.plot=FALSE,lim.cos2.var = 0.3) + plot(log.pca,choix="var",axes = c(2, 3),new.plot=FALSE,lim.cos2.var = 0.3) + plot(log.pca,choix="var",axes = c(1, 3),new.plot=FALSE,lim.cos2.var = 0.3) + plot(log.pca,choix="var",axes = c(1, 4),new.plot=FALSE,lim.cos2.var = 0.3) + plot(log.pca,choix="var",axes = c(2, 4),new.plot=FALSE,lim.cos2.var = 0.3) + #plot(log.pca,choix="var",axes = c(3, 4),new.plot=FALSE,lim.cos2.var = 0.3) + par(op) + title(main=paste("Projection of Species on first factorial axis","\n","\n",sep="")) + dev.off() + + + # PCA with the good number of axis + print("Retaining Principal Components of selected axes...") + log.pca=log.pca$ind$coord[,1:nbaxes] + datLog=round(log.pca,4) + + + } else + + + if(pcaYesNo=="nopca"){ + datLog=datSpecies + } else stop("pcaYesNo must be pca or nopca") + + Store(objects()) + gc(reset=TRUE) + + print(" --- end of step 2 ---") + print(Sys.time()-t1) + + return(datLog) + +} + diff --git a/vmstools/R/getndp.R b/vmstools/R/getndp.R index 08095e9..b0b6e53 100644 --- a/vmstools/R/getndp.R +++ b/vmstools/R/getndp.R @@ -1,10 +1,26 @@ -`getndp` <- -function(x, tol=2*.Machine$double.eps) -{ - ndp <- 0 - while(!isTRUE(all.equal(x, round(x, ndp), tol=tol))) ndp <- ndp+1 - if(ndp > -log10(tol)) warning("Tolerance reached, ndp possibly -underestimated.") - ndp -} - +#' Get Number of Decimal Places +#' +#' Return the number of decimal places of a 'numeric'. +#' +#' +#' @param x Number to find decimal places from +#' @param tol Tolerance to use +#' @note Function not created under EU lot 2 project but found under R-help. +#' Please look there for credits. +#' @author See R-help pages +#' @references EU lot 2 project +#' @examples +#' +#' getndp(5.677) #result: 3 +#' +#' @export getndp +`getndp` <- +function(x, tol=2*.Machine$double.eps) +{ + ndp <- 0 + while(!isTRUE(all.equal(x, round(x, ndp), tol=tol))) ndp <- ndp+1 + if(ndp > -log10(tol)) warning("Tolerance reached, ndp possibly +underestimated.") + ndp +} + diff --git a/vmstools/R/indicators.r b/vmstools/R/indicators.r index 8b445ad..c7c98a1 100644 --- a/vmstools/R/indicators.r +++ b/vmstools/R/indicators.r @@ -1,135 +1,285 @@ -## indicators.r -## by Fabrizio Manco, 14/02/2011 -## calculates the DCF indicators 5,6 or 7 - -indicators <- function ( indicatorNum=5, # indicator 5, 6 or 7 - tacsat, # tacsat-like input data - minThreshold=10, # if time interval has been calculated (and named SI_INTV), it's a minimal nb of minutes, otherwise, it's minimal number of points - pctThreshold=90, # specific to indicator 6, percentage of points to include - ltGear="", # a list of gear code to keep for the analysis /!\ gear code field must be called LE_GEAR - inShapeArea="", # specific to indicator 7, the name of the shapefile without the .shp extension - cellresX=0.05, # grid cell resolution, x axis - cellresY=0.05, # grid cell resolution, y axis - calcAreaMethod="Trapezoid", # "Trapezoid" (fast and less accurate, good for small cellsizes) or "UTM" (accurate but slow, good for huge cellsizes) - plotMapTF=FALSE, - exportGridName="", # if not empty, the monthly (DCF5 and 6) grids will be exported as an ASCII grid (.asc) named with this string, DCF number and month number - exportTableName="" - ) - { - # for all indicators - #remove rows with NA - tacsat<-tacsat[complete.cases(tacsat),] - # gear filtering: intially only necessary for DCF 7 ("area impacted by mobile bottom gears"), but might be useful for other indicators - # keep only the gear codes listed in mobileBottomGear, and tacsat must have a LE_GEAR column - if (length(ltGear)>1 & !is.null(tacsat$LE_GEAR)) {tacsat<-subset(tacsat, tacsat$LE_GEAR %in% ltGear)} - - if (indicatorNum==7) - { - #### DCF INDICATOR 7 - Areas not impacted by mobile bottom gears #### - - require(shapefiles) - require(sp) - require(PBSmapping) - - if (inShapeArea!="") - { # read the shapefile - shapeAll<-read.shapefile(inShapeArea) - - # clip the shape polygon with the land - clipShapeFromLand<-clipPolygons (shapeAll, europa) - - vmsPingsCoord<-cbind(tacsat$SI_LONG, tacsat$SI_LATI) - pointInOutByPoly<-rep(0,length(vmsPingsCoord[,1])) - - ltPoly<-unique(clipShapeFromLand$PID) - - # points in polygons - for (x in 1:length(ltPoly)){ - polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]]) - pointInOutByPoly<-pointInOutByPoly + point.in.polygon(vmsPingsCoord[,1], vmsPingsCoord[,2], polyCoord[,1], polyCoord[,2]) - } - - tacsat$pointInOut<-pointInOutByPoly - tacsat<-subset(tacsat, pointInOut!=0) - } - - # Grid the points - if ("SI_INTV" %in% colnames(tacsat)) { nameVarToSum="SI_INTV"} else {nameVarToSum=""} - if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,".asc",sep="")} else {outGridFileName<-""} - vmsGrid<-vmsGridCreate(tacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotPoints = FALSE, outGridFile=outGridFileName) - - # calculate the area of each cell in square km - vmsGrid<-surface(vmsGrid, method=calcAreaMethod, includeNA=TRUE) - - if (inShapeArea!="") - { - # specify which grid cell is in the polygon - gridPointsCoord<-coordinates(vmsGrid) - gridCellInOutByPoly<-rep(0,length(gridPointsCoord[,1])) - - # cells in polygon - for (x in 1:length(ltPoly)){ - polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]]) - gridCellInOutByPoly<-gridCellInOutByPoly + point.in.polygon(gridPointsCoord[,1], gridPointsCoord[,2], polyCoord[,1], polyCoord[,2]) - } - - vmsGrid$inPolygon<-gridCellInOutByPoly - areaInPolygon<-sum(vmsGrid@data$cellArea[vmsGrid@data$inPolygon==1]) - - } else {areaInPolygon<-sum(vmsGrid@data$cellArea)} # if no shapefile area is provided the entire rectangle containing the pings is processed - - # calculate the areas - areaFishing<-sum(vmsGrid@data$cellArea[!is.na(vmsGrid@data$fishing) & vmsGrid@data$fishing>minThreshold]) - - # calculate the result - tableResultDCF<-areaInPolygon-areaFishing - } - - #### DCF INDICATORS 5 AND 6 #### - if (indicatorNum==5 | indicatorNum==6) - { - # commons for DCF 5 and 6 - if (!"SI_DATIM" %in% colnames(tacsat)) {tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M")} - ltMonth<-unique(as.numeric(format(tacsat$SI_DATIM, format="%m"))) - ltMonth<-sort(ltMonth) - tableResultDCF=data.frame(month=ltMonth, DCF=rep(0, length(ltMonth))) - - # monthly process - for (x in 1:length(ltMonth)){ - currMonth<-ltMonth[x] - monthlyTacsat<-subset(tacsat, as.numeric(format(tacsat$SI_DATIM, format="%m"))==currMonth) - - if (indicatorNum==5) - { - # specific to DCF 5 - if ("SI_INTV" %in% colnames(tacsat)) {nameVarToSum="SI_INTV"} else {nameVarToSum=""} - } - - if (indicatorNum==6) - { - # specific to DCF 6 - # flag the pings inside the MCP according to the pctThreshold - monthlyTacsat<-tacsatMCP(monthlyTacsat, pctThreshold) - # grid the vms pings inside the MCP - monthlyTacsat<-subset(monthlyTacsat, monthlyTacsat$INMCP!=0) - nameVarToSum="" - } - - # Create the grid - if (plotMapTF) {x11()} - if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,"_Month", currMonth,".asc",sep="")} else {outGridFileName<-""} - monthlyVmsGrid<-vmsGridCreate(monthlyTacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotTitle=paste("Month ", currMonth), plotPoints = FALSE, outGridFile=outGridFileName) - if (plotMapTF==TRUE & indicatorNum==6) {plot_mcp(plotnew=FALSE, plotpoints=FALSE, titletxt="")} # plot the specific DCF 6 MCP - - # calculate the area of each cell in square km and store it into a table - monthlyVmsGrid<-surface(monthlyVmsGrid, method=calcAreaMethod, includeNA=FALSE) - tableResultDCF[x,2]<-sum(monthlyVmsGrid@data$cellArea[!is.na(monthlyVmsGrid@data$fishing) & monthlyVmsGrid@data$fishing>minThreshold]) - } - if (exportTableName!="") - { - # export the table as a csv in the wd - write.csv(tableResultDCF, paste(exportTableName, indicatorNum, ".csv", sep="")) - } - } - return(tableResultDCF) - } \ No newline at end of file +## indicators.r +## by Fabrizio Manco, 14/02/2011 +## calculates the DCF indicators 5,6 or 7 + + + +#' Calculate the DCF indicators +#' +#' This function estimates the DCF indicators 5, 6 and 7 from the tacsat +#' dataset. +#' +#' SUMMARY +#' +#' The EU Data Collection Framework (DCF) standardizes three indicators to +#' analyse the fishing activity. They are summarised as follow: +#' +#' DCF Indicator 5: Distribution of fishing activities. The spatial extent of +#' fishing activity based on the total area of grids within which VMS records +#' were obtained, each month; DCF Indicator 6: Aggregation of fishing +#' activities. The extent to which fishing activity is aggregated based on the +#' total area of grids within which 90 percent of VMS records were obtained, +#' each month. DCF Indicator 7: Areas not impacted by mobile bottom gears. The +#' area of seabed that has not been impacted by mobile bottom fishing gears in +#' the last year. Could be reported annually and would state the total +#' proportion of the area by depth strata in each marine region. +#' +#' METHODS +#' +#' These indicators aggregate the tacsat point data into a gridded data frame +#' using the functions mapGrid.r and vmsGridCreate.r and therefore the +#' resolution of the grid (cell size) must be defined. +#' +#' DCF 5 calculates the total area of a grid of cells with fishing activity +#' which is above a minimum threshold of number of pings or number of fishing +#' hours (if the tacsat data contains a field with time interval between two +#' points called SI_INTV, then the threshold will be a minimal number of hours, +#' otherwise it will be a minimal number of points). The area of each cell is +#' calculated with the function surface.r either via a fast and rough method +#' using a trapezoid approximation (option "Trapezoid"), either via a more +#' precise but slow method using a Universal Transverse Mercator projection +#' (option "UTM"). The first method is fine for big grids of small cell sizes, +#' the second one is better for large cell sizes. This total fishing area is +#' processed by month. +#' +#' DCF 6 also calculates the total area of a grid with fishing activity but +#' keeps only the 90 percent of the points by discarding the outer 10\% points +#' (or any other specified percentage). It uses the function tacsatMCP.r +#' adapted from the aspace library. This function draws a minimum convex +#' polygon around the central points to keep. Then these points are gridded and +#' the total area of the cells is calculated with the surface.r function with +#' the same optional methods as DCF 5. This total fishing area is processed by +#' month. +#' +#' DCF 7 calculates the total area of a specified polygon not impacted by +#' mobile bottom gear. It therefore needs that the tacsat data has been merged +#' with the logbooks in order to have a gear code (or others) for each vms +#' point. The indicator needs a list of gear code to include as mobile bottom +#' gears (if empty, all the points will be included). The specified area to be +#' processed is a polygon shapefile. This polygon (or group of polygons) is +#' then clipped with the Europe polygon to be sure that the indicator won't +#' include land in its area calculation. If no shapefile is defined, the area +#' of the bounding box containing all the vms points will be considered. The +#' result is the area of the polygon less the area of the grid where fishing +#' activity occurs. The vms pings are gridded with an optional threshold in +#' either minimal of fishing hours or minimal number of points (see DCF 5). The +#' area of each grid cell is calculated with the surface.r function (see DCF 5 +#' or DCF 6). +#' +#' @param indicatorNum The indicator's number (5,6 or 7) +#' @param tacsat The vms dataframe with tacsat format +#' @param minThreshold The threshold value to consider a cell being "fished" in +#' time if the ping time interval has been calculated and named SI_INTV, +#' otherwise in number of pings +#' @param pctThreshold The threshold value representing the percentage of +#' points to include in the Minimal Convex Polygon for Indicator 6 +#' @param ltGear The list of gear codes to consider, if a gear code is present +#' in the tacsat dataframe and named LE_GEAR +#' @param inShapeArea The input shapefile to consider for DCF Indicator 7; path +#' and namefile without the extension (.shp) +#' @param cellresX The cell size along axis X +#' @param cellresY The cell size along axis Y +#' @param calcAreaMethod The method used to calculate the cell area, can be +#' "Trapezoid" (quick and less acurate) or "UTM" (slow and accurate) +#' @param plotMapTF Plot the maps +#' @param exportGridName If mentionned, each grid will be exported as a ASCII +#' grid +#' @param exportTableName Name of the csv file containing the results +#' @return For DCF Indicator 5: a list of monthly areas is returned, saved (if +#' exportTableName is populated) and monthly grids are exported (if +#' exportGridName is populated) For DCF Indicator 6: a list of monthly areas is +#' returned, saved (if exportTableName is populated) and monthly grids are +#' exported (if exportGridName is populated) For DCF Indicator 7: a annual +#' value is returned and a grid is exported (if exportGridName is populated) +#' @author Fabrizio Manco +#' @seealso \code{\link{mapGrids}} \code{\link{vmsGridCreate}} +#' \code{\link{tacsatMCP}} \code{\link{surface}} +#' @references EU lot 2 project +#' @examples +#' +#' # load the library +#' library(vmstools) +#' # Load the tacsat data +#' data(tacsat) +#' +#' # process the tacsat data: +#' # mandatory if you want the gridding based on a time threshold +#' # (minimal number of hours) +#' #pointInHarbour.r +#' #filterTacsat.r +#' #intervalTacsat.r +#' +#' +#' # load the eflalo +#' data(eflalo) +#' # merge eflalo and tacsat # mandatory for DCF Indicator 7 to +#' # consider only a list of gear codes +#' +#' # DCF Indicator 5 +#' indicators(indicatorNum=5, +#' tacsat, +#' minThreshold=0, +#' cellresX=0.05, +#' cellresY=0.05, +#' calcAreaMethod="Trapezoid", +#' plotMapTF=TRUE, +#' exportTableName="", +#' exportGridName="") +#' +#' # DCF Indicator 6 +#' indicators(indicatorNum=6, +#' tacsat, +#' pctThreshold=90, +#' cellresX=0.05, +#' cellresY=0.05, +#' calcAreaMethod="Trapezoid", +#' plotMapTF=TRUE, +#' exportTableName="", +#' exportGridName="") +#' +#' # DCF Indicator 7 +#' \dontrun{ +#' indicators(indicatorNum=7, +#' tacsat, +#' ltGear=c("TBB","OTB","PTB","DRB","DRH"), +#' inShapeArea="Shapefile", +#' cellresX=0.05, +#' cellresY=0.05, +#' calcAreaMethod="Trapezoid", +#' minThreshold=0, +#' plotMapTF=TRUE, +#' exportGridName="") +#' } +#' +#' @export indicators +indicators <- function ( indicatorNum=5, # indicator 5, 6 or 7 + tacsat, # tacsat-like input data + minThreshold=10, # if time interval has been calculated (and named SI_INTV), it's a minimal nb of minutes, otherwise, it's minimal number of points + pctThreshold=90, # specific to indicator 6, percentage of points to include + ltGear="", # a list of gear code to keep for the analysis /!\ gear code field must be called LE_GEAR + inShapeArea="", # specific to indicator 7, the name of the shapefile without the .shp extension + cellresX=0.05, # grid cell resolution, x axis + cellresY=0.05, # grid cell resolution, y axis + calcAreaMethod="Trapezoid", # "Trapezoid" (fast and less accurate, good for small cellsizes) or "UTM" (accurate but slow, good for huge cellsizes) + plotMapTF=FALSE, + exportGridName="", # if not empty, the monthly (DCF5 and 6) grids will be exported as an ASCII grid (.asc) named with this string, DCF number and month number + exportTableName="" + ) + { + # for all indicators + #remove rows with NA + tacsat<-tacsat[complete.cases(tacsat),] + # gear filtering: intially only necessary for DCF 7 ("area impacted by mobile bottom gears"), but might be useful for other indicators + # keep only the gear codes listed in mobileBottomGear, and tacsat must have a LE_GEAR column + if (length(ltGear)>1 & !is.null(tacsat$LE_GEAR)) {tacsat<-subset(tacsat, tacsat$LE_GEAR %in% ltGear)} + + if (indicatorNum==7) + { + #### DCF INDICATOR 7 - Areas not impacted by mobile bottom gears #### + + require(shapefiles) + require(sp) + require(PBSmapping) + + if (inShapeArea!="") + { # read the shapefile + shapeAll<-read.shapefile(inShapeArea) + + # clip the shape polygon with the land + clipShapeFromLand<-clipPolygons (shapeAll, europa) + + vmsPingsCoord<-cbind(tacsat$SI_LONG, tacsat$SI_LATI) + pointInOutByPoly<-rep(0,length(vmsPingsCoord[,1])) + + ltPoly<-unique(clipShapeFromLand$PID) + + # points in polygons + for (x in 1:length(ltPoly)){ + polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]]) + pointInOutByPoly<-pointInOutByPoly + point.in.polygon(vmsPingsCoord[,1], vmsPingsCoord[,2], polyCoord[,1], polyCoord[,2]) + } + + tacsat$pointInOut<-pointInOutByPoly + tacsat<-subset(tacsat, pointInOut!=0) + } + + # Grid the points + if ("SI_INTV" %in% colnames(tacsat)) { nameVarToSum="SI_INTV"} else {nameVarToSum=""} + if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,".asc",sep="")} else {outGridFileName<-""} + vmsGrid<-vmsGridCreate(tacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotPoints = FALSE, outGridFile=outGridFileName) + + # calculate the area of each cell in square km + vmsGrid<-surface(vmsGrid, method=calcAreaMethod, includeNA=TRUE) + + if (inShapeArea!="") + { + # specify which grid cell is in the polygon + gridPointsCoord<-coordinates(vmsGrid) + gridCellInOutByPoly<-rep(0,length(gridPointsCoord[,1])) + + # cells in polygon + for (x in 1:length(ltPoly)){ + polyCoord<-cbind(clipShapeFromLand$X[clipShapeFromLand$PID==ltPoly[x]],clipShapeFromLand$Y[clipShapeFromLand$PID==ltPoly[x]]) + gridCellInOutByPoly<-gridCellInOutByPoly + point.in.polygon(gridPointsCoord[,1], gridPointsCoord[,2], polyCoord[,1], polyCoord[,2]) + } + + vmsGrid$inPolygon<-gridCellInOutByPoly + areaInPolygon<-sum(vmsGrid@data$cellArea[vmsGrid@data$inPolygon==1]) + + } else {areaInPolygon<-sum(vmsGrid@data$cellArea)} # if no shapefile area is provided the entire rectangle containing the pings is processed + + # calculate the areas + areaFishing<-sum(vmsGrid@data$cellArea[!is.na(vmsGrid@data$fishing) & vmsGrid@data$fishing>minThreshold]) + + # calculate the result + tableResultDCF<-areaInPolygon-areaFishing + } + + #### DCF INDICATORS 5 AND 6 #### + if (indicatorNum==5 | indicatorNum==6) + { + # commons for DCF 5 and 6 + if (!"SI_DATIM" %in% colnames(tacsat)) {tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M")} + ltMonth<-unique(as.numeric(format(tacsat$SI_DATIM, format="%m"))) + ltMonth<-sort(ltMonth) + tableResultDCF=data.frame(month=ltMonth, DCF=rep(0, length(ltMonth))) + + # monthly process + for (x in 1:length(ltMonth)){ + currMonth<-ltMonth[x] + monthlyTacsat<-subset(tacsat, as.numeric(format(tacsat$SI_DATIM, format="%m"))==currMonth) + + if (indicatorNum==5) + { + # specific to DCF 5 + if ("SI_INTV" %in% colnames(tacsat)) {nameVarToSum="SI_INTV"} else {nameVarToSum=""} + } + + if (indicatorNum==6) + { + # specific to DCF 6 + # flag the pings inside the MCP according to the pctThreshold + monthlyTacsat<-tacsatMCP(monthlyTacsat, pctThreshold) + # grid the vms pings inside the MCP + monthlyTacsat<-subset(monthlyTacsat, monthlyTacsat$INMCP!=0) + nameVarToSum="" + } + + # Create the grid + if (plotMapTF) {x11()} + if (exportGridName!="") {outGridFileName<-paste(exportGridName,"_DCF",indicatorNum,"_Month", currMonth,".asc",sep="")} else {outGridFileName<-""} + monthlyVmsGrid<-vmsGridCreate(monthlyTacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", cellsizeX=cellresX, cellsizeY=cellresY, nameVarToSum, plotMap=plotMapTF, plotTitle=paste("Month ", currMonth), plotPoints = FALSE, outGridFile=outGridFileName) + if (plotMapTF==TRUE & indicatorNum==6) {plot_mcp(plotnew=FALSE, plotpoints=FALSE, titletxt="")} # plot the specific DCF 6 MCP + + # calculate the area of each cell in square km and store it into a table + monthlyVmsGrid<-surface(monthlyVmsGrid, method=calcAreaMethod, includeNA=FALSE) + tableResultDCF[x,2]<-sum(monthlyVmsGrid@data$cellArea[!is.na(monthlyVmsGrid@data$fishing) & monthlyVmsGrid@data$fishing>minThreshold]) + } + if (exportTableName!="") + { + # export the table as a csv in the wd + write.csv(tableResultDCF, paste(exportTableName, indicatorNum, ".csv", sep="")) + } + } + return(tableResultDCF) + } diff --git a/vmstools/R/interCubicHermiteSpline.r b/vmstools/R/interCubicHermiteSpline.r index df24ebe..2253bb2 100644 --- a/vmstools/R/interCubicHermiteSpline.r +++ b/vmstools/R/interCubicHermiteSpline.r @@ -1,44 +1,57 @@ - -interCubicHermiteSpline <- function(spltx,spltCon,res,params,headingAdjustment){ - - #Formula of Cubic Hermite Spline - t <- seq(0,1,length.out=res) - F00 <- 2*t^3 -3*t^2 + 1 - F10 <- t^3-2*t^2+t - F01 <- -2*t^3+3*t^2 - F11 <- t^3-t^2 - - #Making tacsat dataset ready - spltx[spltCon[,1],"SI_HE"][which(is.na(spltx[spltCon[,1],"SI_HE"]))] <- 0 - spltx[spltCon[,2],"SI_HE"][which(is.na(spltx[spltCon[,2],"SI_HE"]))] <- 0 - - #Heading at begin point in degrees - Hx0 <- sin(spltx[spltCon[,1],"SI_HE"]/(180/pi)) - Hy0 <- cos(spltx[spltCon[,1],"SI_HE"]/(180/pi)) - - #Heading at end point in degrees - Hx1 <- sin(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi)) - Hy1 <- cos(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi)) - - #Start and end positions - Mx0 <- spltx[spltCon[,1],"SI_LONG"] - Mx1 <- spltx[spltCon[,2],"SI_LONG"] - My0 <- spltx[spltCon[,1],"SI_LATI"] - My1 <- spltx[spltCon[,2],"SI_LATI"] - - #Corrected for longitude lattitude effect - Hx0 <- Hx0 * params$fm * spltx[spltCon[,1],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1]) - Hx1 <- Hx1 * params$fm * spltx[spltCon[,2],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1]) - Hy0 <- Hy0 * params$fm * lonLatRatio(spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,1],"SI_LATI"]) * spltx[spltCon[,1],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1]) - Hy1 <- Hy1 * params$fm * lonLatRatio(spltx[spltCon[,2],"SI_LONG"],spltx[spltCon[,2],"SI_LATI"]) * spltx[spltCon[,2],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1]) - - #Get the interpolation - fx <- outer(F00,Mx0,"*")+outer(F10,Hx0,"*")+outer(F01,Mx1,"*")+outer(F11,Hx1,"*") - fy <- outer(F00,My0,"*")+outer(F10,Hy0,"*")+outer(F01,My1,"*")+outer(F11,Hy1,"*") - - #Create output format - intsx <- lapply(as.list(1:nrow(spltCon)),function(x){ - matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2, - dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))}) - return(intsx)} - +#' Interpolate using Cubic Hermite Spline +#' +#' This is an internal function of interpolateTacsat. Function to interpolate +#' VMS data using a cubic hermite spline function. +#' +#' +#' @param spltx Tacsat dataset split up by vessel using 'split'. +#' @param spltCon List of VMS records that are connected +#' @param res Number of additional points to use for interpolation +#' @param params Parameters to use for interpolation +#' @param headingAdjustment Parameter to adjust the choice of heading depending +#' on its own or previous point +#' @author Niels T. Hintzen +#' @export interCubicHermiteSpline +interCubicHermiteSpline <- function(spltx,spltCon,res,params,headingAdjustment){ + + #Formula of Cubic Hermite Spline + t <- seq(0,1,length.out=res) + F00 <- 2*t^3 -3*t^2 + 1 + F10 <- t^3-2*t^2+t + F01 <- -2*t^3+3*t^2 + F11 <- t^3-t^2 + + #Making tacsat dataset ready + spltx[spltCon[,1],"SI_HE"][which(is.na(spltx[spltCon[,1],"SI_HE"]))] <- 0 + spltx[spltCon[,2],"SI_HE"][which(is.na(spltx[spltCon[,2],"SI_HE"]))] <- 0 + + #Heading at begin point in degrees + Hx0 <- sin(spltx[spltCon[,1],"SI_HE"]/(180/pi)) + Hy0 <- cos(spltx[spltCon[,1],"SI_HE"]/(180/pi)) + + #Heading at end point in degrees + Hx1 <- sin(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi)) + Hy1 <- cos(spltx[spltCon[,2]-headingAdjustment,"SI_HE"]/(180/pi)) + + #Start and end positions + Mx0 <- spltx[spltCon[,1],"SI_LONG"] + Mx1 <- spltx[spltCon[,2],"SI_LONG"] + My0 <- spltx[spltCon[,1],"SI_LATI"] + My1 <- spltx[spltCon[,2],"SI_LATI"] + + #Corrected for longitude lattitude effect + Hx0 <- Hx0 * params$fm * spltx[spltCon[,1],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1]) + Hx1 <- Hx1 * params$fm * spltx[spltCon[,2],"SI_SP"] /((params$st[2]-params$st[1])/2+params$st[1]) + Hy0 <- Hy0 * params$fm * lonLatRatio(spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,1],"SI_LATI"]) * spltx[spltCon[,1],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1]) + Hy1 <- Hy1 * params$fm * lonLatRatio(spltx[spltCon[,2],"SI_LONG"],spltx[spltCon[,2],"SI_LATI"]) * spltx[spltCon[,2],"SI_SP"]/((params$st[2]-params$st[1])/2+params$st[1]) + + #Get the interpolation + fx <- outer(F00,Mx0,"*")+outer(F10,Hx0,"*")+outer(F01,Mx1,"*")+outer(F11,Hx1,"*") + fy <- outer(F00,My0,"*")+outer(F10,Hy0,"*")+outer(F01,My1,"*")+outer(F11,Hy1,"*") + + #Create output format + intsx <- lapply(as.list(1:nrow(spltCon)),function(x){ + matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2, + dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))}) + return(intsx)} + diff --git a/vmstools/R/interStraightLine.r b/vmstools/R/interStraightLine.r index 1f5de2b..013e5dd 100644 --- a/vmstools/R/interStraightLine.r +++ b/vmstools/R/interStraightLine.r @@ -1,12 +1,22 @@ - -interStraightLine <- function(spltx,spltCon,res){ - - fx <- mapply(seq,spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,2],"SI_LONG"],length.out=res) - fy <- mapply(seq,spltx[spltCon[,1],"SI_LATI"],spltx[spltCon[,2],"SI_LATI"],length.out=res) - - #Create output format - intsx <- lapply(as.list(1:nrow(spltCon)),function(x){ - matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2, - dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))}) - return(intsx)} - +#' Interpolate using Straight line +#' +#' This is an internal function of interpolateTacsat. Function to interpolate +#' VMS data using a straight line. +#' +#' +#' @param spltx Tacsat dataset split up by vessel using 'split'. +#' @param spltCon List of VMS records that are connected +#' @param res Number of additional points to use for interpolation +#' @author Niels T. Hintzen +#' @export interStraightLine +interStraightLine <- function(spltx,spltCon,res){ + + fx <- mapply(seq,spltx[spltCon[,1],"SI_LONG"],spltx[spltCon[,2],"SI_LONG"],length.out=res) + fy <- mapply(seq,spltx[spltCon[,1],"SI_LATI"],spltx[spltCon[,2],"SI_LATI"],length.out=res) + + #Create output format + intsx <- lapply(as.list(1:nrow(spltCon)),function(x){ + matrix(rbind(spltx$ID[spltCon[x,]],cbind(fx[,x],fy[,x])),ncol=2, + dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y")))}) + return(intsx)} + diff --git a/vmstools/R/interpolateTacsat.r b/vmstools/R/interpolateTacsat.r index eadd097..9c3f6a1 100644 --- a/vmstools/R/interpolateTacsat.r +++ b/vmstools/R/interpolateTacsat.r @@ -1,155 +1,227 @@ -`interpolateTacsat` <- -function(tacsat #VMS datapoints - ,interval=120 #Specify in minutes, NULL means use all points - ,margin=12 #Specify the margin in minutes that the interval might deviate in a search for the next point - ,res=100 #Resolution of interpolation method (default = 100) - ,method="cHs" #Specify the method to be used: Straight line (SL) of cubic Hermite spline (cHs) - ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6)) #Specify the three parameters: fm, distscale, sigline, speedthreshold - ,headingAdjustment=0 - ,fast=FALSE){ - -if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") -tacsat$sort.int <- 1:nrow(tacsat) -tacsat <- sortTacsat(tacsat) -if(any(diff(tacsat$sort.int)!=1)) stop("Sort tacsat first using 'sortTacsat'") - #Start interpolating the data -if(!method %in% c("cHs","SL")) stop("method selected that does not exist") - -#------------------------------------------------------------------------------- -#Fast method or not -#------------------------------------------------------------------------------- -if(fast){ - #Interpolation only by vessel, so split tacsat up - tacsat$ID <- 1:nrow(tacsat) - splitTa <- split(tacsat,tacsat$VE_REF) - spltTaCon <- lapply(splitTa,function(spltx){ - #Calculate time different between every record - dftimex <- outer(spltx$SI_DATIM,spltx$SI_DATIM,difftime,units="mins") - iStep <- 1 - connect <- list() - counter <- 1 - #Loop over all possible combinations and store if a connection can be made - while(iStep <= nrow(spltx)){ - endp <- which(dftimex[,iStep] >= (interval - margin) & dftimex[,iStep] <= (interval + margin)) - if(length(endp)>0){ - if(length(endp)>1) endp <- endp[which.min(abs(interval - dftimex[endp,iStep]))][1] - connect[[counter]] <- c(iStep,endp) - counter <- counter + 1 - iStep <- endp - } else { iStep <- iStep + 1} - } - #Return matrix of conenctions - return(do.call(rbind,connect))}) - - if(method=="cHs") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){ - return(interCubicHermiteSpline(spltx=splitTa[[y]],spltCon=spltTaCon[[y]],res,params,headingAdjustment))}),recursive=FALSE) - if(method=="SL") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){ - return(interStraightLine(splitTa[[y]],spltTaCon[[y]],res))}),recursive=FALSE) - -} else { - - - #Initiate returning result object - returnInterpolations <- list() - - #Make vectors out of tacsat data to speed up interpolation - VE_REF <- tacsat$VE_REF - SI_LATI <- tacsat$SI_LATI - SI_LONG <- tacsat$SI_LONG - SI_SP <- tacsat$SI_SP - SI_HE <- tacsat$SI_HE - SI_DATIM <- tacsat$SI_DATIM - - #Start iterating over succeeding points - for(iStep in 1:(dim(tacsat)[1]-1)){ - #for(iStep in 1:(4558)){ - #print(iStep) - if(iStep == 1){ - iSuccess <- 0 - endDataSet <- 0 - startVMS <- 1 - ship <- VE_REF[startVMS] - } else { - if(is.na(endVMS)==TRUE) endVMS <- startVMS + 1 - startVMS <- endVMS - ship <- VE_REF[startVMS] - if(endDataSet == 1 & (rev(unique(VE_REF))[1] == ship | startVMS > length(VE_REF))) endDataSet <- 2 #Final end of dataset - } - - #if end of dataset is not reached, try to find succeeding point - if(endDataSet != 2){ - idx <- which(VE_REF == VE_REF[startVMS]) - startidx <- which(idx == startVMS) - result <- findEndTacsat(SI_DATIM[idx],startVMS=startidx,interval,margin) - endVMS <- result[1]+idx[startidx] - endDataSet <- result[2] - - if(startVMS == dim(tacsat)[1] | (startVMS+1 == dim(tacsat)[1] & VE_REF[startVMS] != VE_REF[startVMS+1])){ - endDataSet <- 1 - endVMS <- NA - } - - if(is.na(endVMS)==TRUE) int <- 0 #No interpolation possible - if(is.na(endVMS)==FALSE) int <- 1 #Interpolation possible - #Interpolate according to the Cubic Hermite Spline method - if(method == "cHs" & int == 1){ - - #Define the cHs formula - F00 <- numeric() - F10 <- numeric() - F01 <- numeric() - F11 <- numeric() - i <- 0 - t <- seq(0,1,length.out=res) - F00 <- 2*t^3 -3*t^2 + 1 - F10 <- t^3-2*t^2+t - F01 <- -2*t^3+3*t^2 - F11 <- t^3-t^2 - - if (is.na(SI_HE[startVMS])=="TRUE") SI_HE[startVMS] <- 0 - if (is.na(SI_HE[endVMS])=="TRUE") SI_HE[endVMS] <- 0 - - #Heading at begin point in degrees - Hx0 <- sin(SI_HE[startVMS]/(180/pi)) - Hy0 <- cos(SI_HE[startVMS]/(180/pi)) - #Heading at end point in degrees - Hx1 <- sin(SI_HE[endVMS-headingAdjustment]/(180/pi)) - Hy1 <- cos(SI_HE[endVMS-headingAdjustment]/(180/pi)) - - Mx0 <- SI_LONG[startVMS] - Mx1 <- SI_LONG[endVMS] - My0 <- SI_LATI[startVMS] - My1 <- SI_LATI[endVMS] - - #Corrected for longitude lattitude effect - Hx0 <- Hx0 * params$fm * SI_SP[startVMS] /((params$st[2]-params$st[1])/2+params$st[1]) - Hx1 <- Hx1 * params$fm * SI_SP[endVMS] /((params$st[2]-params$st[1])/2+params$st[1]) - Hy0 <- Hy0 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[1] * SI_SP[startVMS]/((params$st[2]-params$st[1])/2+params$st[1]) - Hy1 <- Hy1 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[2] * SI_SP[endVMS]/((params$st[2]-params$st[1]) /2+params$st[1]) - - #Finalizing the interpolation based on cHs - fx <- numeric() - fy <- numeric() - fx <- F00*Mx0+F10*Hx0+F01*Mx1+F11*Hx1 - fy <- F00*My0+F10*Hy0+F01*My1+F11*Hy1 - - #Add one to list of successful interpolations - iSuccess <- iSuccess + 1 - returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y"))) - } - - #Interpolate according to a straight line - if(method == "SL" & int == 1){ - fx <- seq(SI_LONG[startVMS],SI_LONG[endVMS],length.out=res) - fy <- seq(SI_LATI[startVMS],SI_LATI[endVMS],length.out=res) - - #Add one to list of successful interpolations - iSuccess <- iSuccess + 1 - returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y"))) - } - } - } -} - -return(returnInterpolations)} - +#' Find interpolated points between two succeeding GPS datapoints +#' +#' Method to find interpolated points between two succeeding Tacsat / GPS +#' datapoints. Two different methods can be applied: the cubic Hermite spline +#' method as developed in Hintzen et al. 2010 or an interpolation of a straight +#' line (on a x-y plane, not a sphere) +#' +#' Interval: In most Tacsat datasets the succeeding datapoint can be found 1 or +#' 2 hours appart. This interval time should be specified here. Interval can +#' also be specified as e.g. 15 minutes if the Tacsat / GPS dataset allows +#' this. Margin: Hardly ever, the interval time is precise. To allow some +#' deviation from the 1 or 2 hour interval the margin can be adjusted. +#' +#' The 'res' value defines the number of points to use to construct the +#' interpolation. Default = 100 which results in a high resolution +#' interpolation. +#' +#' Params can be obtained from analyses (see Hintzen et al. 2010) or can be +#' varied to explore the differences. To interpolate only first value needs to +#' be specified and the speedtreshold needs to be given. +#' +#' headingAdjustment can either be 0 or 1 in case the heading at the endpoint +#' does not represent the heading of the arriving vessel to that point but the +#' departing vessel. This is only of use when heading needs to be calculated +#' outside the GPS dataset where heading is calculated at the bearing between +#' two points. For the startpoint, the bearing is correct, for the endpoint +#' however, the heading is incorrect. +#' +#' The object returned is a list. Each list item contains in the first row the +#' index of the succeeding Tacsat pionts interpolated. The second till last row +#' contain the x (first column) and y (second column) interpolated values. The +#' length of the list indicates the number of interpolations created. +#' +#' @param tacsat Tacsat dataset with GPS points that need to be interpolated +#' @param interval Time in minutes between the succeeding datapoints +#' @param margin Deviation from specified interval to find succeeding +#' datapoints +#' @param res Number of points to use to create interpolation (including start +#' and end point), default = 100 +#' @param method "cHs" for cubic Hermite spline or "SL" for Straight Line +#' interpolation, default = 0.2,20,0.2 +#' @param params Params needed for cHs method, including the speed thresholds +#' used +#' @param headingAdjustment Parameter to adjust the choice of heading depending +#' on its own or previous point +#' @param fast If your machine holds enough RAM (4gig or more) than the fast +#' version runs 7x faster +#' @author Niels T. Hintzen +#' @seealso \code{\link{findEndTacsat}}, \code{\link{distance}}, +#' \code{\link{lonLatRatio}}, \code{\link{plotInterpolation}}, +#' \code{\link{sortTacsat}}, \code{\link{filterTacsat}} +#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using +#' cubic Hermite spline interpolation of position registration data, EU lot 2 +#' project +#' @examples +#' +#' #Load the Tacsat data +#' data(tacsat) +#' +#' #Sort the VMS data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the Tacsat data +#' tacsat <- filterTacsat(tacsat,st=c(2,6),hd=NULL) +#' +#' #Interpolate the VMS data +#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10,res=100, +#' method="cHs",params=list(fm=0.5,distscale=20, +#' sigline=0.2,st=c(2,6)),headingAdjustment=0) +#' +#' @export interpolateTacsat +`interpolateTacsat` <- +function(tacsat #VMS datapoints + ,interval=120 #Specify in minutes, NULL means use all points + ,margin=12 #Specify the margin in minutes that the interval might deviate in a search for the next point + ,res=100 #Resolution of interpolation method (default = 100) + ,method="cHs" #Specify the method to be used: Straight line (SL) of cubic Hermite spline (cHs) + ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6)) #Specify the three parameters: fm, distscale, sigline, speedthreshold + ,headingAdjustment=0 + ,fast=FALSE){ + +if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") +tacsat$sort.int <- 1:nrow(tacsat) +tacsat <- sortTacsat(tacsat) +if(any(diff(tacsat$sort.int)!=1)) stop("Sort tacsat first using 'sortTacsat'") + #Start interpolating the data +if(!method %in% c("cHs","SL")) stop("method selected that does not exist") + +#------------------------------------------------------------------------------- +#Fast method or not +#------------------------------------------------------------------------------- +if(fast){ + #Interpolation only by vessel, so split tacsat up + tacsat$ID <- 1:nrow(tacsat) + splitTa <- split(tacsat,tacsat$VE_REF) + spltTaCon <- lapply(splitTa,function(spltx){ + #Calculate time different between every record + dftimex <- outer(spltx$SI_DATIM,spltx$SI_DATIM,difftime,units="mins") + iStep <- 1 + connect <- list() + counter <- 1 + #Loop over all possible combinations and store if a connection can be made + while(iStep <= nrow(spltx)){ + endp <- which(dftimex[,iStep] >= (interval - margin) & dftimex[,iStep] <= (interval + margin)) + if(length(endp)>0){ + if(length(endp)>1) endp <- endp[which.min(abs(interval - dftimex[endp,iStep]))][1] + connect[[counter]] <- c(iStep,endp) + counter <- counter + 1 + iStep <- endp + } else { iStep <- iStep + 1} + } + #Return matrix of conenctions + return(do.call(rbind,connect))}) + + if(method=="cHs") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){ + return(interCubicHermiteSpline(spltx=splitTa[[y]],spltCon=spltTaCon[[y]],res,params,headingAdjustment))}),recursive=FALSE) + if(method=="SL") returnInterpolations <- unlist(lapply(as.list(names(unlist(lapply(spltTaCon,nrow)))),function(y){ + return(interStraightLine(splitTa[[y]],spltTaCon[[y]],res))}),recursive=FALSE) + +} else { + + + #Initiate returning result object + returnInterpolations <- list() + + #Make vectors out of tacsat data to speed up interpolation + VE_REF <- tacsat$VE_REF + SI_LATI <- tacsat$SI_LATI + SI_LONG <- tacsat$SI_LONG + SI_SP <- tacsat$SI_SP + SI_HE <- tacsat$SI_HE + SI_DATIM <- tacsat$SI_DATIM + + #Start iterating over succeeding points + for(iStep in 1:(dim(tacsat)[1]-1)){ + #for(iStep in 1:(4558)){ + #print(iStep) + if(iStep == 1){ + iSuccess <- 0 + endDataSet <- 0 + startVMS <- 1 + ship <- VE_REF[startVMS] + } else { + if(is.na(endVMS)==TRUE) endVMS <- startVMS + 1 + startVMS <- endVMS + ship <- VE_REF[startVMS] + if(endDataSet == 1 & (rev(unique(VE_REF))[1] == ship | startVMS > length(VE_REF))) endDataSet <- 2 #Final end of dataset + } + + #if end of dataset is not reached, try to find succeeding point + if(endDataSet != 2){ + idx <- which(VE_REF == VE_REF[startVMS]) + startidx <- which(idx == startVMS) + result <- findEndTacsat(SI_DATIM[idx],startVMS=startidx,interval,margin) + endVMS <- result[1]+idx[startidx] + endDataSet <- result[2] + + if(startVMS == dim(tacsat)[1] | (startVMS+1 == dim(tacsat)[1] & VE_REF[startVMS] != VE_REF[startVMS+1])){ + endDataSet <- 1 + endVMS <- NA + } + + if(is.na(endVMS)==TRUE) int <- 0 #No interpolation possible + if(is.na(endVMS)==FALSE) int <- 1 #Interpolation possible + #Interpolate according to the Cubic Hermite Spline method + if(method == "cHs" & int == 1){ + + #Define the cHs formula + F00 <- numeric() + F10 <- numeric() + F01 <- numeric() + F11 <- numeric() + i <- 0 + t <- seq(0,1,length.out=res) + F00 <- 2*t^3 -3*t^2 + 1 + F10 <- t^3-2*t^2+t + F01 <- -2*t^3+3*t^2 + F11 <- t^3-t^2 + + if (is.na(SI_HE[startVMS])=="TRUE") SI_HE[startVMS] <- 0 + if (is.na(SI_HE[endVMS])=="TRUE") SI_HE[endVMS] <- 0 + + #Heading at begin point in degrees + Hx0 <- sin(SI_HE[startVMS]/(180/pi)) + Hy0 <- cos(SI_HE[startVMS]/(180/pi)) + #Heading at end point in degrees + Hx1 <- sin(SI_HE[endVMS-headingAdjustment]/(180/pi)) + Hy1 <- cos(SI_HE[endVMS-headingAdjustment]/(180/pi)) + + Mx0 <- SI_LONG[startVMS] + Mx1 <- SI_LONG[endVMS] + My0 <- SI_LATI[startVMS] + My1 <- SI_LATI[endVMS] + + #Corrected for longitude lattitude effect + Hx0 <- Hx0 * params$fm * SI_SP[startVMS] /((params$st[2]-params$st[1])/2+params$st[1]) + Hx1 <- Hx1 * params$fm * SI_SP[endVMS] /((params$st[2]-params$st[1])/2+params$st[1]) + Hy0 <- Hy0 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[1] * SI_SP[startVMS]/((params$st[2]-params$st[1])/2+params$st[1]) + Hy1 <- Hy1 * params$fm * lonLatRatio(SI_LONG[c(startVMS,endVMS)],SI_LATI[c(startVMS,endVMS)])[2] * SI_SP[endVMS]/((params$st[2]-params$st[1]) /2+params$st[1]) + + #Finalizing the interpolation based on cHs + fx <- numeric() + fy <- numeric() + fx <- F00*Mx0+F10*Hx0+F01*Mx1+F11*Hx1 + fy <- F00*My0+F10*Hy0+F01*My1+F11*Hy1 + + #Add one to list of successful interpolations + iSuccess <- iSuccess + 1 + returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y"))) + } + + #Interpolate according to a straight line + if(method == "SL" & int == 1){ + fx <- seq(SI_LONG[startVMS],SI_LONG[endVMS],length.out=res) + fy <- seq(SI_LATI[startVMS],SI_LATI[endVMS],length.out=res) + + #Add one to list of successful interpolations + iSuccess <- iSuccess + 1 + returnInterpolations[[iSuccess]] <- matrix(rbind(c(startVMS,endVMS),cbind(fx,fy)),ncol=2,dimnames=list(c("startendVMS",seq(1,res,1)),c("x","y"))) + } + } + } +} + +return(returnInterpolations)} + diff --git a/vmstools/R/interpolation2Tacsat.r b/vmstools/R/interpolation2Tacsat.r index efef156..7dbbd29 100644 --- a/vmstools/R/interpolation2Tacsat.r +++ b/vmstools/R/interpolation2Tacsat.r @@ -1,158 +1,206 @@ -interpolation2Tacsat <- function (interpolation, tacsat, npoints = 10, equalDist = TRUE) -{ - tacsat <- sortTacsat(tacsat) - if (!"HL_ID" %in% colnames(tacsat)) - tacsat$HL_ID <- 1:nrow(tacsat) - if (!"SI_DATIM" %in% colnames(tacsat)) - tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, - sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - - #- Convert the interpolation to equally distance spaced points - if (equalDist) { - interpolationEQ <- equalDistance(interpolation, npoints) - } else { - interpolationEQ <- lapply(interpolation, function(x) { - idx <- round(seq(2, nrow(x), length.out = npoints)) - return(x[c(1, idx), ]) - }) - } - #- Get the interpolated longitudes and latitudes out - int <- lapply(interpolationEQ, function(x) { - x[-1, ] - }) - lenint <- lapply(int, length) - idx <- which(unlist(lenint) == (npoints * 2)) - int <- lapply(interpolationEQ[idx], function(x) { - x[-1, ] - }) - lenEQ <- length(interpolationEQ[idx]) - intmin2 <- lapply(int, function(x) { - x <- x[-c(1, nrow(x)), ] - colnames(x) <- c("SI_LONG", "SI_LATI") - return(x) - }) - intidx <- lapply(interpolationEQ[idx], function(x) { - x[1, ] - }) - - #- Make a unique ID per interpolation - INT_ID <- as.data.frame(do.call(rbind,intidx)) - INT_ID$INT_ID <- 1:nrow(INT_ID) - tacsat$INT_ID <- NA - for(iRow in 1:nrow(INT_ID)) - tacsat[INT_ID[iRow,1]:INT_ID[iRow,2],"INT_ID"] <- INT_ID$INT_ID[iRow] - - #- Aggregate tacsat information for all pings within an interpolation - idxNoIntTacsat <- which(is.na(tacsat$INT_ID)) - if(length(idxNoIntTacsat)>0){ - noIntTacsat <- tacsat[idxNoIntTacsat,] - intTacsat <- tacsat[-idxNoIntTacsat,] - - matchClnms <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))] - aggInttacsat<- aggregate(tacsat[-idxNoIntTacsat,matchClnms],by=list(tacsat[-idxNoIntTacsat,"INT_ID"]),FUN=sum,na.rm=T) - colnames(aggInttacsat)[1] <- "INT_ID" - } else { - noIntTacsat <- numeric() - intTacsat <- tacsat - matchClnms <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))] - aggInttacsat<- aggregate(tacsat[,matchClnms],by=list(tacsat[,"INT_ID"]),FUN=sum,na.rm=T) - colnames(aggInttacsat)[1] <- "INT_ID" - } - - #- get the column names that cannot be interpolated - clnames <- colnames(tacsat) - b <- clnames[!clnames %in% c("SI_LONG", "SI_LATI", - "SI_HE", "SI_SP", "SI_DATE", "SI_TIME", - "SI_DATIM")] - - #- Replicate values that cannot be interpolated - bvals <- lapply(as.list(1:lenEQ), function(x) { - matrix(unlist(tacsat[intidx[[x]][1], b]), nrow = npoints - - 2, ncol = length(b), byrow = TRUE, dimnames = list(round(seq(intidx[[x]][1], - intidx[[x]][2], length.out = npoints - 2), 3), b)) - }) - #- Apply a simple equal speading to the data values (Ideally we would make this dependent on SI_SP but then the equalDist should be modified too) - bvals <- lapply(as.list(1:lenEQ),function(x){ - dat2replace <- subset(aggInttacsat,INT_ID == bvals[[x]][1,"INT_ID"]) - if(length(kgeur(b))>0){ - for(iSpec in b[kgeur(b)]) - bvals[[x]][,iSpec] <- c(dat2replace[,iSpec]/(npoints-1)) - mode(bvals[[x]][,kgeur(b)]) <- "numeric" - } - if("INTV" %in% b){ - bvals[[x]][,"INTV"] <- c(dat2replace[,"INTV"]/(npoints-1)) - mode(bvals[[x]][,"INTV"]) <- "numeric" - } - if("LE_SURF" %in% b){ - bvals[[x]][,"LE_SURF"] <- c(dat2replace[,"LE_SURF"]/(npoints-1)) - mode(bvals[[x]][,"LE_SURF"]) <- "numeric" - } - if("LE_SUBSURF" %in% b){ - bvals[[x]][,"LE_SUBSURF"] <- c(dat2replace[,"LE_SUBSURF"]/(npoints-1)) - mode(bvals[[x]][,"LE_SUBSURF"]) <- "numeric" - } - return(bvals[[x]]) - }) - - SI_DATIMs <- data.frame(from = tacsat$SI_DATIM[do.call(rbind, - intidx)[, 1]], to = tacsat$SI_DATIM[do.call(rbind, intidx)[, - 2]]) - SI_DATIMs <- lapply(as.list(1:lenEQ), function(x) { - seq(SI_DATIMs[x, 1], SI_DATIMs[x, 2], length.out = npoints)[2:(npoints - - 1)] - }) - SI_DATE <- lapply(SI_DATIMs, function(x) { - format(x, format = "%d/%m/%Y") - }) - timeNotation <- ifelse(length(unlist(strsplit(tacsat$SI_TIME[1], - ":"))) > 2, "secs", "mins") - if (timeNotation == "secs") - SI_TIME <- lapply(SI_DATIMs, function(x) { - format(x, format = "%H:%M:%S") - }) - if (timeNotation == "mins") - SI_TIME <- lapply(SI_DATIMs, function(x) { - format(x, format = "%H:%M") - }) - SI_SPs <- as.matrix(data.frame(from = tacsat$SI_SP[do.call(rbind, - intidx)[, 1]], to = tacsat$SI_SP[do.call(rbind, intidx)[, - 2]])) - SI_SP <- mapply(seq, from = SI_SPs[, 1], to = SI_SPs[, 2], - length.out = npoints - 2) - SI_HE <- lapply(as.list(1:lenEQ), function(x) { - y <- int[[x]] - return(bearing(y[2:(nrow(y) - 1), 1], y[2:(nrow(y) - - 1), 2], y[3:nrow(y), 1], y[3:nrow(y), 2])) - }) - ret <- lapply(as.list(1:lenEQ), function(x) { - data.frame(bvals[[x]], intmin2[[x]], SI_DATIM = SI_DATIMs[[x]], - SI_DATE = SI_DATE[[x]], SI_TIME = SI_TIME[[x]], SI_SP = SI_SP[, - x], SI_HE = SI_HE[[x]], stringsAsFactors = F) - }) - - #- Need to apply the same equal spreading to the original tacsat data - matchClnms <- c("INTV","LE_SURF","LE_SUBSURF", - colnames(intTacsat)[kgeur(colnames(intTacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(intTacsat)[kgeur(colnames(intTacsat))]) %in% colnames(intTacsat))] - intTacsat[,matchClnms] <- intTacsat[,matchClnms] / (npoints-1) - - #- Combine all together again - interpolationTot <- do.call(rbind, ret) - interpolationTot <- formatTacsat(interpolationTot) - if(length(noIntTacsat)>0){ - tacsatInt <- rbindTacsat(rbindTacsat(intTacsat,noIntTacsat),interpolationTot) - } else { - tacsatInt <- rbindTacsat(intTacsat,interpolationTot) - } - for(iCol in c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsatInt)[kgeur(colnames(tacsatInt))])) - tacsatInt[,iCol] <- anf(tacsatInt[,iCol]) - tacsatInt <- sortTacsat(tacsatInt) - - #- Do consistency check - if(length(kgeur(colnames(tacsatInt)))>0){ - ratio <- sum(tacsatInt[,kgeur(colnames(tacsatInt))],na.rm=T)/sum(tacsat[,kgeur(colnames(tacsatInt))],na.rm=T) - if(ratio <= 0.99 | ratio >= 1.01 | is.na(ratio)) - warnings("Some kilo/euros/surface area swept is misplaced") - } - - return(tacsatInt) -} +#' Convert the output (a list) from interpolateTacsat to tacsat +#' +#' This function takes the list of tracks output by interpolateTacsat and +#' converts them back to tacsat format. This means that all the functions that +#' work on tacsat can work with this new, larger interpolated dataset. For +#' example you could now distribute the landings among the new pings. The +#' npoints argument is the optional number of pings between each real or actual +#' ping. +#' +#' The distances between the new vessel positions are assumed to be equal. +#' +#' @param interpolation A list of tracks between actual pings output by the +#' interpolateTacsat function. +#' @param tacsat Tacsat data used when the interpolateTacsat function was +#' called. +#' @param npoints The number of pings or positions required between each real +#' or actual vessel position or ping +#' @param equalDist Logical. Whether the number of positions returned should be +#' equally spaced or not +#' @return A data frame in tacsat format is returned with estimated locations, +#' speeds and headings between each real or actual vessel position. The column +#' added HL_ID refers to the ID given to each individual interpolation. +#' @author Niels Hintzen, Doug Beare +#' @seealso \code{\link{interpolateTacsat}} +#' @references EU Lot 2 project +#' @examples +#' +#' +#' data(tacsat) +#' # Take out first 100 rows +#' input <- tacsat[1:100,] +#' # set resolution for number of locations or pings required +#' # between each real or actual vessel location. +#' res1 <- 100 +#' +#' #Do the interpolation +#' interpolation <- interpolateTacsat(tacsat=input,interval=120, +#' margin=12,res=res1,method="cHs", +#' params=list(fm=0.4,distscale=20,sigline=0.2, +#' st=c(4,8)),headingAdjustment=0) +#' +#' #Convert the interpolated data back to tacsat with 10 locations or +#' # pings between each real or actual vessel location. +#' tacsat <- interpolation2Tacsat(interpolation=interpolation, +#' tacsat=input,npoints=10,equalDist=TRUE) +#' +#' +#' @export interpolation2Tacsat +interpolation2Tacsat <- function (interpolation, tacsat, npoints = 10, equalDist = TRUE) +{ + tacsat <- sortTacsat(tacsat) + if (!"HL_ID" %in% colnames(tacsat)) + tacsat$HL_ID <- 1:nrow(tacsat) + if (!"SI_DATIM" %in% colnames(tacsat)) + tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, + sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + + #- Convert the interpolation to equally distance spaced points + if (equalDist) { + interpolationEQ <- equalDistance(interpolation, npoints) + } else { + interpolationEQ <- lapply(interpolation, function(x) { + idx <- round(seq(2, nrow(x), length.out = npoints)) + return(x[c(1, idx), ]) + }) + } + #- Get the interpolated longitudes and latitudes out + int <- lapply(interpolationEQ, function(x) { + x[-1, ] + }) + lenint <- lapply(int, length) + idx <- which(unlist(lenint) == (npoints * 2)) + int <- lapply(interpolationEQ[idx], function(x) { + x[-1, ] + }) + lenEQ <- length(interpolationEQ[idx]) + intmin2 <- lapply(int, function(x) { + x <- x[-c(1, nrow(x)), ] + colnames(x) <- c("SI_LONG", "SI_LATI") + return(x) + }) + intidx <- lapply(interpolationEQ[idx], function(x) { + x[1, ] + }) + + #- Make a unique ID per interpolation + INT_ID <- as.data.frame(do.call(rbind,intidx)) + INT_ID$INT_ID <- 1:nrow(INT_ID) + tacsat$INT_ID <- NA + for(iRow in 1:nrow(INT_ID)) + tacsat[INT_ID[iRow,1]:INT_ID[iRow,2],"INT_ID"] <- INT_ID$INT_ID[iRow] + + #- Aggregate tacsat information for all pings within an interpolation + idxNoIntTacsat <- which(is.na(tacsat$INT_ID)) + if(length(idxNoIntTacsat)>0){ + noIntTacsat <- tacsat[idxNoIntTacsat,] + intTacsat <- tacsat[-idxNoIntTacsat,] + + matchClnms <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))] + aggInttacsat<- aggregate(tacsat[-idxNoIntTacsat,matchClnms],by=list(tacsat[-idxNoIntTacsat,"INT_ID"]),FUN=sum,na.rm=T) + colnames(aggInttacsat)[1] <- "INT_ID" + } else { + noIntTacsat <- numeric() + intTacsat <- tacsat + matchClnms <- c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsat)[kgeur(colnames(tacsat))]) %in% colnames(tacsat))] + aggInttacsat<- aggregate(tacsat[,matchClnms],by=list(tacsat[,"INT_ID"]),FUN=sum,na.rm=T) + colnames(aggInttacsat)[1] <- "INT_ID" + } + + #- get the column names that cannot be interpolated + clnames <- colnames(tacsat) + b <- clnames[!clnames %in% c("SI_LONG", "SI_LATI", + "SI_HE", "SI_SP", "SI_DATE", "SI_TIME", + "SI_DATIM")] + + #- Replicate values that cannot be interpolated + bvals <- lapply(as.list(1:lenEQ), function(x) { + matrix(unlist(tacsat[intidx[[x]][1], b]), nrow = npoints - + 2, ncol = length(b), byrow = TRUE, dimnames = list(round(seq(intidx[[x]][1], + intidx[[x]][2], length.out = npoints - 2), 3), b)) + }) + #- Apply a simple equal speading to the data values (Ideally we would make this dependent on SI_SP but then the equalDist should be modified too) + bvals <- lapply(as.list(1:lenEQ),function(x){ + dat2replace <- subset(aggInttacsat,INT_ID == bvals[[x]][1,"INT_ID"]) + if(length(kgeur(b))>0){ + for(iSpec in b[kgeur(b)]) + bvals[[x]][,iSpec] <- c(dat2replace[,iSpec]/(npoints-1)) + mode(bvals[[x]][,kgeur(b)]) <- "numeric" + } + if("INTV" %in% b){ + bvals[[x]][,"INTV"] <- c(dat2replace[,"INTV"]/(npoints-1)) + mode(bvals[[x]][,"INTV"]) <- "numeric" + } + if("LE_SURF" %in% b){ + bvals[[x]][,"LE_SURF"] <- c(dat2replace[,"LE_SURF"]/(npoints-1)) + mode(bvals[[x]][,"LE_SURF"]) <- "numeric" + } + if("LE_SUBSURF" %in% b){ + bvals[[x]][,"LE_SUBSURF"] <- c(dat2replace[,"LE_SUBSURF"]/(npoints-1)) + mode(bvals[[x]][,"LE_SUBSURF"]) <- "numeric" + } + return(bvals[[x]]) + }) + + SI_DATIMs <- data.frame(from = tacsat$SI_DATIM[do.call(rbind, + intidx)[, 1]], to = tacsat$SI_DATIM[do.call(rbind, intidx)[, + 2]]) + SI_DATIMs <- lapply(as.list(1:lenEQ), function(x) { + seq(SI_DATIMs[x, 1], SI_DATIMs[x, 2], length.out = npoints)[2:(npoints - + 1)] + }) + SI_DATE <- lapply(SI_DATIMs, function(x) { + format(x, format = "%d/%m/%Y") + }) + timeNotation <- ifelse(length(unlist(strsplit(tacsat$SI_TIME[1], + ":"))) > 2, "secs", "mins") + if (timeNotation == "secs") + SI_TIME <- lapply(SI_DATIMs, function(x) { + format(x, format = "%H:%M:%S") + }) + if (timeNotation == "mins") + SI_TIME <- lapply(SI_DATIMs, function(x) { + format(x, format = "%H:%M") + }) + SI_SPs <- as.matrix(data.frame(from = tacsat$SI_SP[do.call(rbind, + intidx)[, 1]], to = tacsat$SI_SP[do.call(rbind, intidx)[, + 2]])) + SI_SP <- mapply(seq, from = SI_SPs[, 1], to = SI_SPs[, 2], + length.out = npoints - 2) + SI_HE <- lapply(as.list(1:lenEQ), function(x) { + y <- int[[x]] + return(bearing(y[2:(nrow(y) - 1), 1], y[2:(nrow(y) - + 1), 2], y[3:nrow(y), 1], y[3:nrow(y), 2])) + }) + ret <- lapply(as.list(1:lenEQ), function(x) { + data.frame(bvals[[x]], intmin2[[x]], SI_DATIM = SI_DATIMs[[x]], + SI_DATE = SI_DATE[[x]], SI_TIME = SI_TIME[[x]], SI_SP = SI_SP[, + x], SI_HE = SI_HE[[x]], stringsAsFactors = F) + }) + + #- Need to apply the same equal spreading to the original tacsat data + matchClnms <- c("INTV","LE_SURF","LE_SUBSURF", + colnames(intTacsat)[kgeur(colnames(intTacsat))])[which(c("INTV","LE_SURF","LE_SUBSURF",colnames(intTacsat)[kgeur(colnames(intTacsat))]) %in% colnames(intTacsat))] + intTacsat[,matchClnms] <- intTacsat[,matchClnms] / (npoints-1) + + #- Combine all together again + interpolationTot <- do.call(rbind, ret) + interpolationTot <- formatTacsat(interpolationTot) + if(length(noIntTacsat)>0){ + tacsatInt <- rbindTacsat(rbindTacsat(intTacsat,noIntTacsat),interpolationTot) + } else { + tacsatInt <- rbindTacsat(intTacsat,interpolationTot) + } + for(iCol in c("INTV","LE_SURF","LE_SUBSURF",colnames(tacsatInt)[kgeur(colnames(tacsatInt))])) + tacsatInt[,iCol] <- anf(tacsatInt[,iCol]) + tacsatInt <- sortTacsat(tacsatInt) + + #- Do consistency check + if(length(kgeur(colnames(tacsatInt)))>0){ + ratio <- sum(tacsatInt[,kgeur(colnames(tacsatInt))],na.rm=T)/sum(tacsat[,kgeur(colnames(tacsatInt))],na.rm=T) + if(ratio <= 0.99 | ratio >= 1.01 | is.na(ratio)) + warnings("Some kilo/euros/surface area swept is misplaced") + } + + return(tacsatInt) +} diff --git a/vmstools/R/intervalTacsat.r b/vmstools/R/intervalTacsat.r index 4439647..cdd2830 100644 --- a/vmstools/R/intervalTacsat.r +++ b/vmstools/R/intervalTacsat.r @@ -1,72 +1,109 @@ -intervalTacsat <- function(tacsat,level="trip",weight=c(1,0),fill.na=FALSE){ - - if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector") - weight <- weight / sum(weight,na.rm=TRUE) - - #- First sort the tacsat dataset - tacsat <- sortTacsat(tacsat) - - #- Add date-time stamp - if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - - #- If a trip level is specified, the interval rate can be calculated by trip (to make sure that no long - # interval rates between trips occur in comparison to by level="vessel" - if(level=="trip"){ - if(is.null(tacsat$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level") - sptacsat <- split(tacsat,tacsat$VE_REF) - tacsat$INTV <- unlist(lapply(sptacsat,function(x){ - - FT_REF <- as.factor(x$FT_REF); - res <- by(x,FT_REF, - function(y){ - if(nrow(y)>1){ - difftime_xmin1 <- c(NA,difftime(y$SI_DATIM[2:nrow(y)],y$SI_DATIM[1:(nrow(y)-1)],units="mins")) - difftime_xplus1 <- c(difftime_xmin1[-1],NA) - if(any(weight == 0)){ - if(weight[1] == 0) INTV <- difftime_xplus1 - if(weight[2] == 0) INTV <- difftime_xmin1 - } else { INTV <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2])) - } - #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different - # weight setting. - if(fill.na==TRUE){ - idx <- which(is.na(INTV)==TRUE) - INTV[idx] <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE) - INTV[idx][which(INTV[idx]==0)]<- NA - } - - return(INTV) - } else { - return(NA) - } - }) - return(unsplit(res,FT_REF))})) - tacsat$INTV[which(tacsat$FT_REF == "0")] <- NA - } - #- If no trip level is specified, the other option is to calculate interval rate by vessel - if(level=="vessel"){ - difftime_xmin1 <- c(NA,difftime(tacsat$SI_DATIM[2:nrow(tacsat)],tacsat$SI_DATIM[1:(nrow(tacsat)-1)],units="mins")) - difftime_xplus1 <- c(difftime_xmin1[-1],NA) - if(any(weight == 0)){ - if(weight[1] == 0) INTV <- difftime_xplus1 - if(weight[2] == 0) INTV <- difftime_xmin1 - } else { INTV <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2])) - } - #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different - # weight setting. - if(fill.na==TRUE){ - idx <- which(is.na(INTV)==TRUE) - INTV[idx] <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE) - INTV[idx][which(INTV[idx]==0)]<- NA - } - tacsat$INTV <- INTV - - vessels <- unique(tacsat$VE_REF) - first.vessels <- unlist(lapply(as.list(vessels),function(x){which(tacsat$VE_REF==x)[1]})) - last.vessels <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsat$VE_REF==x))[1]})) - if(weight[1] != 0) tacsat$INTV[first.vessels] <- NA - if(weight[2] != 0) tacsat$INTV[last.vessels] <- NA - if(fill.na==TRUE) tacsat$INTV[first.vessels] <- difftime_xplus1[first.vessels] - if(fill.na==TRUE) tacsat$INTV[last.vessels] <- difftime_xmin1[last.vessels] - } - return(tacsat)} \ No newline at end of file +#' Return the interval time between pings +#' +#' Return the interval time between pings of one vessel or at the trip level. +#' Interval is calculated based on ping x and ping x-1 or ping x and ping x+1. +#' +#' Note that the DEFAULT interval given is the difference between ping x and +#' ping x-1. Hence, the first ping of a vessel or trip does NOT have an +#' interval rate and will display NA. +#' +#' With weight you can specify if the interval rate is used between ping x and +#' ping x-1 (weight = c(1,0)), if the interval rate is used between ping x and +#' ping x+1 (weight = c(0,1)) or an intermediate weight (weight = c(0.4,0.6) / +#' equal weight = c(0.5,0.5)). +#' +#' @param tacsat tacsat dataset +#' @param level level to get interval rate at: trip or vessel +#' @param weight weight to apply to calculation of mean interval rate towards +#' and away from ping +#' @param fill.na If interval rate cannot be calculated based on default or +#' provided weight, take closest alternative to provide an interval rate +#' @return The original tacsat file is returned including a column: INTV which +#' holds the interval rate in minutes +#' @author Niels T. Hintzen +#' @seealso \code{\link{interpolateTacsat}},\code{\link{calculateSpeed}} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' result <- intervalTacsat(tacsat[1:100,],level="vessel") +#' result <- intervalTacsat(tacsat[1:100,],level="vessel",weight=c(2,1),fill.na=TRUE) +#' +#' data(eflalo) +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' result <- intervalTacsat(tacsatp[1:100,],level="trip",weight=c(1,1),fill.na=FALSE) +#' +#' +#' @export intervalTacsat +intervalTacsat <- function(tacsat,level="trip",weight=c(1,0),fill.na=FALSE){ + + if(length(weight) != 2) stop("weight must be specified as a length 2 numeric vector") + weight <- weight / sum(weight,na.rm=TRUE) + + #- First sort the tacsat dataset + tacsat <- sortTacsat(tacsat) + + #- Add date-time stamp + if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + + #- If a trip level is specified, the interval rate can be calculated by trip (to make sure that no long + # interval rates between trips occur in comparison to by level="vessel" + if(level=="trip"){ + if(is.null(tacsat$FT_REF)==TRUE) stop("no tripnumber available to merge on trip level") + sptacsat <- split(tacsat,tacsat$VE_REF) + tacsat$INTV <- unlist(lapply(sptacsat,function(x){ + + FT_REF <- as.factor(x$FT_REF); + res <- by(x,FT_REF, + function(y){ + if(nrow(y)>1){ + difftime_xmin1 <- c(NA,difftime(y$SI_DATIM[2:nrow(y)],y$SI_DATIM[1:(nrow(y)-1)],units="mins")) + difftime_xplus1 <- c(difftime_xmin1[-1],NA) + if(any(weight == 0)){ + if(weight[1] == 0) INTV <- difftime_xplus1 + if(weight[2] == 0) INTV <- difftime_xmin1 + } else { INTV <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2])) + } + #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different + # weight setting. + if(fill.na==TRUE){ + idx <- which(is.na(INTV)==TRUE) + INTV[idx] <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE) + INTV[idx][which(INTV[idx]==0)]<- NA + } + + return(INTV) + } else { + return(NA) + } + }) + return(unsplit(res,FT_REF))})) + tacsat$INTV[which(tacsat$FT_REF == "0")] <- NA + } + #- If no trip level is specified, the other option is to calculate interval rate by vessel + if(level=="vessel"){ + difftime_xmin1 <- c(NA,difftime(tacsat$SI_DATIM[2:nrow(tacsat)],tacsat$SI_DATIM[1:(nrow(tacsat)-1)],units="mins")) + difftime_xplus1 <- c(difftime_xmin1[-1],NA) + if(any(weight == 0)){ + if(weight[1] == 0) INTV <- difftime_xplus1 + if(weight[2] == 0) INTV <- difftime_xmin1 + } else { INTV <- rowSums(cbind(difftime_xmin1*weight[1],difftime_xplus1*weight[2])) + } + #- If INTV equals NA, then check if there are other possibilities to calculate the interval rate based on a different + # weight setting. + if(fill.na==TRUE){ + idx <- which(is.na(INTV)==TRUE) + INTV[idx] <- rowSums(cbind(difftime_xmin1[idx],difftime_xplus1[idx]),na.rm=TRUE) + INTV[idx][which(INTV[idx]==0)]<- NA + } + tacsat$INTV <- INTV + + vessels <- unique(tacsat$VE_REF) + first.vessels <- unlist(lapply(as.list(vessels),function(x){which(tacsat$VE_REF==x)[1]})) + last.vessels <- unlist(lapply(as.list(vessels),function(x){rev(which(tacsat$VE_REF==x))[1]})) + if(weight[1] != 0) tacsat$INTV[first.vessels] <- NA + if(weight[2] != 0) tacsat$INTV[last.vessels] <- NA + if(fill.na==TRUE) tacsat$INTV[first.vessels] <- difftime_xplus1[first.vessels] + if(fill.na==TRUE) tacsat$INTV[last.vessels] <- difftime_xmin1[last.vessels] + } + return(tacsat)} diff --git a/vmstools/R/kgeur.r b/vmstools/R/kgeur.r index 10e93e2..3382f9d 100644 --- a/vmstools/R/kgeur.r +++ b/vmstools/R/kgeur.r @@ -1 +1,17 @@ -kgeur <- function(x){return(c(grep("KG",x),grep("EURO",x)))} \ No newline at end of file +#' Return kg and euro column index of eflalo dataset +#' +#' Returns the index of the columns with kg and euro information from a given +#' eflalo dataset +#' +#' +#' @param x Colnames of eflalo dataset (or any other dataset with column names +#' as LE_KG_ and LE_EURO_) +#' @author Niels T. Hintzen +#' @references EU Lot 2 project +#' @examples +#' +#' data(eflalo) +#' kgeur(colnames(eflalo)) +#' +#' @export kgeur +kgeur <- function(x){return(c(grep("KG",x),grep("EURO",x)))} diff --git a/vmstools/R/km2Degree.R b/vmstools/R/km2Degree.R index 615a646..53924ea 100644 --- a/vmstools/R/km2Degree.R +++ b/vmstools/R/km2Degree.R @@ -1,12 +1,35 @@ -`km2Degree` <- -function(lon,lat,km){ - x1 <- lon - y1 <- lat - - a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2); - c <- 2*atan2(sqrt(a),sqrt(1-a)); - R <- 6371; - dx1 <- R*c - - return(km / dx1)} - +#' Compute distance from kilometers into degrees +#' +#' Function transformes the distance expressed in kilometers into degrees. This +#' based on the GPS location of a point. +#' +#' +#' @param lon Longitude of the GPS position +#' @param lat Latitude of the GPS positiona +#' @param km Value in Km to turn into degrees +#' @note Computation of degrees is approximation based on the Haversine formula +#' @author Niels T. Hintzen +#' @seealso \code{\link{distance}}, \code{\link{degree2Km}}, +#' \code{\link{lonLatRatio}} +#' @references EU lot 2 project +#' @examples +#' +#' lon <- -4 +#' lat <- 50 +#' km <- 114.4897 +#' +#' km2Degree(lon,lat,km) #1.601833 +#' +#' @export km2Degree +`km2Degree` <- +function(lon,lat,km){ + x1 <- lon + y1 <- lat + + a <- cos(y1*pi/180)*cos(y1*pi/180)*sin((1*pi/180)/2)*sin((1*pi/180)/2); + c <- 2*atan2(sqrt(a),sqrt(1-a)); + R <- 6371; + dx1 <- R*c + + return(km / dx1)} + diff --git a/vmstools/R/labellingHauls.r b/vmstools/R/labellingHauls.r index e32682a..b335748 100644 --- a/vmstools/R/labellingHauls.r +++ b/vmstools/R/labellingHauls.r @@ -1,36 +1,59 @@ - -# assign an identifier in'HL_ID' to each of the fishing sequences -# (based on SI_STATE, assuming the "h", "f", "s" coding) -# (useful to count them in a grid...) -labellingHauls <- function(tacsat){ - tacsat$SI_STATE2 <- tacsat$SI_STATE - tacsat$SI_STATE <- as.character(tacsat$SI_STATE) - tacsat[is.na(tacsat$SI_STATE), 'SI_STATE'] <- '1' # assign steaming - tacsat[tacsat$SI_STATE!='f', 'SI_STATE'] <- '1' # assign steaming - tacsat[tacsat$SI_STATE=='f', 'SI_STATE'] <- '2' # assign fishing - tacsat$SI_STATE <- as.numeric(tacsat$SI_STATE) - tacsat$HL_ID <- c(0, diff(tacsat$SI_STATE)) # init - tacsat$HL_ID <- cumsum(tacsat$HL_ID) # fishing sequences detected. - tacsat$SS_ID <- 1- tacsat$HL_ID # steaming sequences detected. - tacsat$HL_ID <- cumsum(tacsat$SS_ID ) # fishing sequences labelled. - tacsat[tacsat$SI_STATE==1, 'HL_ID'] <- 0 # correct label 0 for steaming - tacsat$HL_ID <- factor(tacsat$HL_ID) - levels(tacsat$HL_ID) <- 0: (length(levels(tacsat$HL_ID))-1) # rename the id for increasing numbers from 0 - tacsat$HL_ID <- as.character(tacsat$HL_ID) - # then assign a unique id - idx <- tacsat$HL_ID!=0 - tacsat[idx, "HL_ID"] <- paste( - tacsat$VE_REF[idx], "_", - tacsat$LE_GEAR[idx], "_", - tacsat$HL_ID[idx], - sep="") - tacsat$SI_STATE <- tacsat$SI_STATE2 - tacsat <- tacsat[, !colnames(tacsat) %in% c('SS_ID', 'SI_STATE2')] # remove useless column - return(tacsat) - } - -# label fishing sequecnes with a unique identifier (method based on SI_STATE) -#tacsat <- labellingHauls(tacsat) - - - +# assign an identifier in'HL_ID' to each of the fishing sequences +# (based on SI_STATE, assuming the "h", "f", "s" coding) +# (useful to count them in a grid...) + + +#' Labelling fishing operations +#' +#' Labelling fishing sequences from the SI_STATE coding with an unique +#' identifier +#' +#' +#' @param tacsat a tacsat format with the SI_STATE coding informed for "h", "f" +#' and "s" +#' @return add a new column to the tacsat data.frame named HL_ID +#' @author F. Bastardie +#' @seealso \code{\link{activityTacsat}} +#' @examples +#' +#' data(tacsat) +#' tacsat$SI_STATE <- 0 +#' tacsat$SI_SP <- replace(tacsat$SI_SP, is.na(tacsat$SI_SP), 0) +#' tacsat[tacsat$SI_SP >= 1.5 & tacsat$SI_SP <= 7.5,'SI_STATE'] <- 'f' +#' tacsat[tacsat$SI_SP < 1.5,'SI_STATE'] <- 'h' +#' tacsat[tacsat$SI_SP > 7.5,'SI_STATE'] <- 's' +#' labellingHauls(tacsat) +#' +#' @export labellingHauls +labellingHauls <- function(tacsat){ + tacsat$SI_STATE2 <- tacsat$SI_STATE + tacsat$SI_STATE <- as.character(tacsat$SI_STATE) + tacsat[is.na(tacsat$SI_STATE), 'SI_STATE'] <- '1' # assign steaming + tacsat[tacsat$SI_STATE!='f', 'SI_STATE'] <- '1' # assign steaming + tacsat[tacsat$SI_STATE=='f', 'SI_STATE'] <- '2' # assign fishing + tacsat$SI_STATE <- as.numeric(tacsat$SI_STATE) + tacsat$HL_ID <- c(0, diff(tacsat$SI_STATE)) # init + tacsat$HL_ID <- cumsum(tacsat$HL_ID) # fishing sequences detected. + tacsat$SS_ID <- 1- tacsat$HL_ID # steaming sequences detected. + tacsat$HL_ID <- cumsum(tacsat$SS_ID ) # fishing sequences labelled. + tacsat[tacsat$SI_STATE==1, 'HL_ID'] <- 0 # correct label 0 for steaming + tacsat$HL_ID <- factor(tacsat$HL_ID) + levels(tacsat$HL_ID) <- 0: (length(levels(tacsat$HL_ID))-1) # rename the id for increasing numbers from 0 + tacsat$HL_ID <- as.character(tacsat$HL_ID) + # then assign a unique id + idx <- tacsat$HL_ID!=0 + tacsat[idx, "HL_ID"] <- paste( + tacsat$VE_REF[idx], "_", + tacsat$LE_GEAR[idx], "_", + tacsat$HL_ID[idx], + sep="") + tacsat$SI_STATE <- tacsat$SI_STATE2 + tacsat <- tacsat[, !colnames(tacsat) %in% c('SS_ID', 'SI_STATE2')] # remove useless column + return(tacsat) + } + +# label fishing sequecnes with a unique identifier (method based on SI_STATE) +#tacsat <- labellingHauls(tacsat) + + + diff --git a/vmstools/R/landingsMaps2GIFanim.r b/vmstools/R/landingsMaps2GIFanim.r index 216f726..daf4217 100644 --- a/vmstools/R/landingsMaps2GIFanim.r +++ b/vmstools/R/landingsMaps2GIFanim.r @@ -1,31 +1,74 @@ - # author F.Bastardie - # automatic creation of .gif animation using library(animation) - # need to install the ImageMagik freeware -landingsMaps2GIFanim <- function(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"), - spp=c("COD","SPR","HER","PLE","FLE","DAB","WHB")){ -require(animation) -for(sp in spp){ - for(met in list.files(file.path(idir, sp))){ - for(what in c("weight","value")){ - setwd(file.path(idir, sp, met,what,"quarter")) - filename <- paste("map_landings_",what,"_merged_vessels_",sp,"_",met,"_",sep='') - cat(paste(filename,"\n")) - wildcard = paste(filename, "*.", "jpeg", sep = "") - im.convert(wildcard, output = file.path(getwd(), paste("anim",".gif", sep='')), # library(animation) - convert = "convert", - cmd.fun = system, clean = FALSE) - } # end what - } # end met - } # end sp -return() -} - - # pings2LandingMaps (all.merged=all.merged, sp="LE_EURO_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), - # cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area - # breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000) - # ) - # pings2LandingMaps (all.merged=all.merged, sp="LE_KG_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), - # cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area - # breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000) - # ) - # landingsMaps2GIFanim(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"), spp=c("COD","SPR") ) \ No newline at end of file + # author F.Bastardie + # automatic creation of .gif animation using library(animation) + # need to install the ImageMagik freeware + + +#' generate some gif animations from the landings maps +#' +#' generate some gif animations from the landings maps after having generated +#' the landings maps from the merged table per year, per metier (level6), per +#' metier-quarter all together +#' +#' Using some facilities providen by the 'animation' R package. Need ImageMagik +#' installed. +#' +#' @param idir the full path of the jpegLandings folder +#' @param spp vector of FAO codes for species +#' @author Francois Bastardie +#' @examples +#' +#' +#' \dontrun{ +#' +#' +#' for(a.year in as.character(2005:2009)){ +#' +#' # ...and load the merged output table for all vessels +#' load(file.path("C:","output",paste("all_merged__",a.year,".RData",sep=''))) +#' +#' # generate the maps +#' pings2LandingsMaps (all.merged=all.merged, sp="LE_EURO_COD", +#' output= file.path("C:","VMSanalysis", "FemernBelt"), +#' cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, +#' breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4), +#' 100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)) +#' } +#' +#' # create some animations... +#' # (need the 'animation' R package and ImageMagik installed) +#' landingsMaps2GIFanim(idir = file.path("C:","VMSanalysis","FemernBelt", +#' "jpegLandings"), spp=c("COD") ) +#' +#' +#' } +#' +#' +#' @export landingsMaps2GIFanim +landingsMaps2GIFanim <- function(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"), + spp=c("COD","SPR","HER","PLE","FLE","DAB","WHB")){ +require(animation) +for(sp in spp){ + for(met in list.files(file.path(idir, sp))){ + for(what in c("weight","value")){ + setwd(file.path(idir, sp, met,what,"quarter")) + filename <- paste("map_landings_",what,"_merged_vessels_",sp,"_",met,"_",sep='') + cat(paste(filename,"\n")) + wildcard = paste(filename, "*.", "jpeg", sep = "") + im.convert(wildcard, output = file.path(getwd(), paste("anim",".gif", sep='')), # library(animation) + convert = "convert", + cmd.fun = system, clean = FALSE) + } # end what + } # end met + } # end sp +return() +} + + # pings2LandingMaps (all.merged=all.merged, sp="LE_EURO_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), + # cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area + # breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000) + # ) + # pings2LandingMaps (all.merged=all.merged, sp="LE_KG_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), + # cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area + # breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000) + # ) + # landingsMaps2GIFanim(idir = file.path("C:","VMSanalysis","FemernBelt","jpegLandings"), spp=c("COD","SPR") ) diff --git a/vmstools/R/lonLat2SpatialPolygons.r b/vmstools/R/lonLat2SpatialPolygons.r index f60ac4a..24d1c98 100644 --- a/vmstools/R/lonLat2SpatialPolygons.r +++ b/vmstools/R/lonLat2SpatialPolygons.r @@ -1,95 +1,124 @@ -lonLat2SpatialPolygons <- function(SI_LONG=NULL,SI_LATI=NULL,lst=NULL){ - if((is.null(SI_LONG)==TRUE | is.null(SI_LATI)==TRUE) & is.null(lst)==TRUE) stop("Specify either longitude and latitude vectors or a list containing a dataframe with specified longitude and latitude vectors") - - #- Return spatial polygons object when SI_LONG and SI_LATI vectors are supplied - if(is.null(SI_LONG)==FALSE | is.null(SI_LATI)==FALSE){ - if(length(SI_LONG) != length(SI_LATI)) stop("Length SI_LONG not equal to length SI_LATI") - brks <- which(is.na(SI_LONG)==TRUE | is.na(SI_LATI) == TRUE) - storePols <- list() - #- If no breaks - if(length(brks)==0){ - if((SI_LONG[1] != SI_LONG[length(SI_LONG)]) | (SI_LATI[1] != SI_LATI[length(SI_LATI)])){ - coords <- coordinates(rbind(cbind(SI_LONG,SI_LATI), - cbind(SI_LONG[1],SI_LATI[1]))) - } else { - coords <- coordinates(cbind(SI_LONG,SI_LATI)) - } - pol <- Polygon(coords) - pols <- Polygons(list(pol),ID="1") - storePols[[1]] <- pols - } else { - #- If breaks - for(i in 1:length(brks)){ - if(i == 1){ - if((SI_LONG[1] != SI_LONG[(brks[i]-1)]) | (SI_LATI[1] != SI_LATI[(brks[i]-1)])){ - coords <- coordinates(rbind(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)]), - cbind(SI_LONG[1],SI_LATI[1]))) - } else { - coords <- coordinates(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)])) - } - } else { - if((SI_LONG[(brks[i-1]+1)] != SI_LONG[(brks[i]-1)]) | (SI_LATI[(brks[i-1]+1)] != SI_LATI[(brks[i]-1)])){ - coords <- coordinates(rbind(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)]), - cbind(SI_LONG[(brks[i-1]+1)],SI_LATI[(brks[i-1]+1)]))) - } else { - coords <- coordinates(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)])) - } - } - pol <- Polygon(coords) - pols <- Polygons(list(pol),ID=i) - storePols[[i]] <- pols - } - } - spol <- SpatialPolygons(storePols) - } - #- Return spatial polygons object when list with SI_LONG,SI_LATI dataframe is given - if(is.null(lst)==FALSE){ - if(is.null(names(lst))==TRUE) names(lst) <- 1:length(lst) - storePols <- list() - counter <- 1 - for(j in 1:length(lst)){ - brks <- which(is.na(lst[[j]]$SI_LONG)==TRUE | is.na(lst[[j]]$SI_LATI) == TRUE) - #- If no breaks - if(length(brks)==0){ - if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[nrow(lst[[j]])]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[nrow(lst[[j]])])){ - coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI), - cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1]))) - } else { - coords <- coordinates(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI)) - } - pol <- Polygon(coords) - pols <- Polygons(list(pol),ID=counter) - storePols[[counter]] <- pols - counter <- counter+1 - } else { - #- If breaks - if(length(which(diff(brks)==1)>0)) stop("Only 1 row of NA is allowed in between values of SI_LONG and SI_LATI") - for(i in 1:length(brks)){ - if(i == 1){ - if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[(brks[i]-1)])){ - coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)]), - cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1]))) - } else { - coords <- coordinates(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)])) - } - } else { - if((lst[[j]]$SI_LONG[(brks[i-1]+1)] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[(brks[i-1]+1)] != lst[[j]]$SI_LATI[(brks[i]-1)])){ - coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)]), - cbind(lst[[j]]$SI_LONG[(brks[i-1]+1)],lst[[j]]$SI_LATI[(brks[i-1]+1)]))) - } else { - coords <- coordinates(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)])) - } - } - pol <- Polygon(coords) - pols <- Polygons(list(pol),ID=counter) - storePols[[counter]] <- pols - counter <- counter + 1 - } - } - } - #- Create spatial polygons - spol <- SpatialPolygons(storePols) - } - return(spol)} - - \ No newline at end of file +#' Creates a 'SpatialPolygons' object from longitude-latitude dataset +#' +#' Takes one set of longitude-lattitude or a list of lon-lat objects and +#' converts it into one SpatialPolygons object. +#' +#' If the longitude and latitude set do not make a full round, the function +#' will do that for you. +#' +#' @param SI_LONG Set with longitude values +#' @param SI_LATI Set with latitude values +#' @param lst list of objects, each with longitude and latitude values +#' @return Returns a 'SpatialPolygons' set from the package 'sp' +#' @note May take a while when many polygons need to be created +#' @author Niels T. Hintzen +#' @seealso \code{\link{SpatialPolygons}} +#' @examples +#' +#' data(europa) +#' +#' eurPols <- lonLat2SpatialPolygons(lst=lapply(as.list(sort(unique(europa$SID))), +#' function(x){data.frame( +#' SI_LONG=subset(europa,SID==x)$X, +#' SI_LATI=subset(europa,SID==x)$Y)})) +#' +#' area <- lonLat2SpatialPolygons(SI_LONG=c(2,2.5,2.7,2.1),SI_LATI=c(54,54.2,55.8,55.6)) +#' plot(eurPols,col="green",xlim=c(-4,10),ylim=c(48,62)); axis(1);axis(2);box() +#' plot(area,add=TRUE,col="red") +#' +#' @export lonLat2SpatialPolygons +lonLat2SpatialPolygons <- function(SI_LONG=NULL,SI_LATI=NULL,lst=NULL){ + if((is.null(SI_LONG)==TRUE | is.null(SI_LATI)==TRUE) & is.null(lst)==TRUE) stop("Specify either longitude and latitude vectors or a list containing a dataframe with specified longitude and latitude vectors") + + #- Return spatial polygons object when SI_LONG and SI_LATI vectors are supplied + if(is.null(SI_LONG)==FALSE | is.null(SI_LATI)==FALSE){ + if(length(SI_LONG) != length(SI_LATI)) stop("Length SI_LONG not equal to length SI_LATI") + brks <- which(is.na(SI_LONG)==TRUE | is.na(SI_LATI) == TRUE) + storePols <- list() + #- If no breaks + if(length(brks)==0){ + if((SI_LONG[1] != SI_LONG[length(SI_LONG)]) | (SI_LATI[1] != SI_LATI[length(SI_LATI)])){ + coords <- coordinates(rbind(cbind(SI_LONG,SI_LATI), + cbind(SI_LONG[1],SI_LATI[1]))) + } else { + coords <- coordinates(cbind(SI_LONG,SI_LATI)) + } + pol <- Polygon(coords) + pols <- Polygons(list(pol),ID="1") + storePols[[1]] <- pols + } else { + #- If breaks + for(i in 1:length(brks)){ + if(i == 1){ + if((SI_LONG[1] != SI_LONG[(brks[i]-1)]) | (SI_LATI[1] != SI_LATI[(brks[i]-1)])){ + coords <- coordinates(rbind(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)]), + cbind(SI_LONG[1],SI_LATI[1]))) + } else { + coords <- coordinates(cbind(SI_LONG[1:(brks[i]-1)],SI_LATI[1:(brks[i]-1)])) + } + } else { + if((SI_LONG[(brks[i-1]+1)] != SI_LONG[(brks[i]-1)]) | (SI_LATI[(brks[i-1]+1)] != SI_LATI[(brks[i]-1)])){ + coords <- coordinates(rbind(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)]), + cbind(SI_LONG[(brks[i-1]+1)],SI_LATI[(brks[i-1]+1)]))) + } else { + coords <- coordinates(cbind(SI_LONG[(brks[i-1]+1):(brks[i]-1)],SI_LATI[(brks[i-1]+1):(brks[i]-1)])) + } + } + pol <- Polygon(coords) + pols <- Polygons(list(pol),ID=i) + storePols[[i]] <- pols + } + } + spol <- SpatialPolygons(storePols) + } + #- Return spatial polygons object when list with SI_LONG,SI_LATI dataframe is given + if(is.null(lst)==FALSE){ + if(is.null(names(lst))==TRUE) names(lst) <- 1:length(lst) + storePols <- list() + counter <- 1 + for(j in 1:length(lst)){ + brks <- which(is.na(lst[[j]]$SI_LONG)==TRUE | is.na(lst[[j]]$SI_LATI) == TRUE) + #- If no breaks + if(length(brks)==0){ + if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[nrow(lst[[j]])]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[nrow(lst[[j]])])){ + coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI), + cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1]))) + } else { + coords <- coordinates(cbind(lst[[j]]$SI_LONG,lst[[j]]$SI_LATI)) + } + pol <- Polygon(coords) + pols <- Polygons(list(pol),ID=counter) + storePols[[counter]] <- pols + counter <- counter+1 + } else { + #- If breaks + if(length(which(diff(brks)==1)>0)) stop("Only 1 row of NA is allowed in between values of SI_LONG and SI_LATI") + for(i in 1:length(brks)){ + if(i == 1){ + if((lst[[j]]$SI_LONG[1] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[1] != lst[[j]]$SI_LATI[(brks[i]-1)])){ + coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)]), + cbind(lst[[j]]$SI_LONG[1],lst[[j]]$SI_LATI[1]))) + } else { + coords <- coordinates(cbind(lst[[j]]$SI_LONG[1:(brks[i]-1)],lst[[j]]$SI_LATI[1:(brks[i]-1)])) + } + } else { + if((lst[[j]]$SI_LONG[(brks[i-1]+1)] != lst[[j]]$SI_LONG[(brks[i]-1)]) | (lst[[j]]$SI_LATI[(brks[i-1]+1)] != lst[[j]]$SI_LATI[(brks[i]-1)])){ + coords <- coordinates(rbind(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)]), + cbind(lst[[j]]$SI_LONG[(brks[i-1]+1)],lst[[j]]$SI_LATI[(brks[i-1]+1)]))) + } else { + coords <- coordinates(cbind(lst[[j]]$SI_LONG[(brks[i-1]+1):(brks[i]-1)],lst[[j]]$SI_LATI[(brks[i-1]+1):(brks[i]-1)])) + } + } + pol <- Polygon(coords) + pols <- Polygons(list(pol),ID=counter) + storePols[[counter]] <- pols + counter <- counter + 1 + } + } + } + #- Create spatial polygons + spol <- SpatialPolygons(storePols) + } + return(spol)} + + diff --git a/vmstools/R/lonLatRatio.r b/vmstools/R/lonLatRatio.r index 26322d5..7949d10 100644 --- a/vmstools/R/lonLatRatio.r +++ b/vmstools/R/lonLatRatio.r @@ -1,10 +1,35 @@ -`lonLatRatio` <- - function(x1,lat){ - #Based on the Haversine formula - #At the position, the y-position remains the same, hence, cos(lat)*cos(lat) instead of cos(lat) * cos(y2) - a <- cos(lat*pi/180)*cos(lat*pi/180)*sin((0.1*pi/180)/2)*sin((0.1*pi/180)/2); - c <- 2*atan2(sqrt(a),sqrt(1-a)); - R <- 6371; - dx1 <- R*c - - return(c(dx1/11.12))} +#' Calculate the ratio between 1 degree in longitude versus 1 degree in +#' latitude +#' +#' The distance in Km on the longitude direction changes along the latitude +#' direction. This function computes the ratio between 1 degree in the +#' longitude direction depending on the latitude of the GPS position. Returns +#' the ratio's of two GPS locations (two succeeding VMS datapoints). Can be +#' used with 1 GPS position too, return NA for second value. +#' +#' +#' @param lon Longitude of the two GPS positions +#' @param lat Latitude of the two GPS positions +#' @note Computation is approximation based on the Haversine formula +#' @author Niels T. Hintzen +#' @seealso \code{\link{distance}}, \code{\link{degree2Km}}, +#' \code{\link{km2Degree}} +#' @references EU lot 2 project +#' @examples +#' +#' lon <- -4 +#' lat <- 50 +#' +#' lonLatRatio(lon,lat) +#' +#' @export lonLatRatio +`lonLatRatio` <- + function(x1,lat){ + #Based on the Haversine formula + #At the position, the y-position remains the same, hence, cos(lat)*cos(lat) instead of cos(lat) * cos(y2) + a <- cos(lat*pi/180)*cos(lat*pi/180)*sin((0.1*pi/180)/2)*sin((0.1*pi/180)/2); + c <- 2*atan2(sqrt(a),sqrt(1-a)); + R <- 6371; + dx1 <- R*c + + return(c(dx1/11.12))} diff --git a/vmstools/R/mapGrid.r b/vmstools/R/mapGrid.r index b95e0bf..e813a14 100644 --- a/vmstools/R/mapGrid.r +++ b/vmstools/R/mapGrid.r @@ -1,123 +1,173 @@ -#mapGrid.r -#andy south 12/2/09 - -#to map grids input as SGDF -#** OR maybe just provide a wrapper to mapGriddedData instead ? -#would need to add a better worldmap into that - -mapGrid <- function( sGDF - , sPDF - , we="" - , ea="" - , so="" - , no="" - , gridValName="fishing" - , plotTitle = "" - , numCats = 5 - , paletteCats = "heat.colors" - , addLegend = TRUE - , legendx='bottomleft' - , legendncol = 1 - , legendtitle = "fishing activity" - , plotPoints = FALSE - , colPoints =1 - , legPoints = FALSE - , colLand = 'sienna' - , addICESgrid = FALSE - , addScale = TRUE - , outGridFile = "" #name for output gridAscii - , outPlot = "" #name for output png - , ... ) -{ - -require(sp) -require(maptools) - -par(mar=c(4,6,1,1)) - -xlim0=c(we,ea) -ylim0=c(so,no) - -lstargs <- list(...) - -#dev.new() -if(length(lstargs$breaks0)==0) { - breaks0 <- pretty(sGDF[[gridValName]],n=numCats) - } else{ - breaks0 <- lstargs$breaks0 - } - -# rainbow, heat.colors, etc. -cols <- rev(do.call(paletteCats, list(length(breaks0)-1))) - -require(mapdata) -#windows(8,7) # what is this for ? -map("worldHires", add=FALSE,col=colLand,fill=TRUE, bg="white", xlim=xlim0 + c(+0.1,-0.1), ylim=ylim0 + c(+0.1,-0.1), -regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium', -'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria', -'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco', -'tunisia','algeria','egypt' ), mar=c(2,6,2,2)) - - im <- as.image.SpatialGridDataFrame(sGDF, attr=2) - image(im$x,im$y,im$z, axes=FALSE, col=cols, breaks = breaks0, - xlim=xlim0 , ylim=ylim0, add=TRUE ) - -#add ICES rectangles -if(addICESgrid){ - for(i in seq(-15,50, by=1)) abline(v=i) - for(i in seq(0, 75, by=0.5)) abline(h=i) - } - -map("worldHires", add=TRUE, col=colLand, fill=TRUE, bg="white", xlim=xlim0 , ylim=ylim0 , -regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium', -'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria', -'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco', -'tunisia','algeria','egypt' )) - - -box() # to put a box around the plot -#mtext(paste(gear,year),font=4,line=-1.5) -axis(1) -axis(2, las=2) - if(we>0){ - mtext("Degree East", side=1, line=2) - } else{ - mtext("Degree West", side=1, line=2) - } - if(no>0){ - mtext("Degree North", side=2, line=3) - } else{ - mtext("Degree South", side=2, line=3) - } - - - -# add a scale -if(addScale) map.scale(x=xlim0[2]-(xlim0[2]-xlim0[1])/2, y=ylim0[1], ratio=FALSE) - -#to add points (can obscure grid) -if (plotPoints) { - if(length(colPoints)!=1) { - colPoints <- factor(colPoints) - a.legPoints <- levels(colPoints) - levels(colPoints) <- colors()[(1:length(levels(colPoints))) *10] - points(sPDF, pch=16, col=as.character(colPoints),cex=0.5) - - if(length(legPoints)!=0){ - legend(x='bottomright', legend=a.legPoints, pch = 16, col=levels(colPoints), title="", ncol=2, bg="white", pt.cex=1) - } - }else{ - points(sPDF, pch='.', col=colPoints,cex=0.1) - } - } - -#legend(x='bottomleft', legend=breaks[1:(length(breaks)-1)], pch = 22, pt.bg=cols, title="fishing activity",bg="white",pt.cex=2 ) -legend(x=legendx, legend=breaks0[1:(length(breaks0)-1)], pch = 22, pt.bg=cols, title=legendtitle, ncol=legendncol, bg="white",pt.cex=2 ) - - -#to add plotTitle -if (plotTitle != "") mtext(plotTitle) - - - -} #end of mapGrid \ No newline at end of file +#mapGrid.r +#andy south 12/2/09 + +#to map grids input as SGDF +#** OR maybe just provide a wrapper to mapGriddedData instead ? +#would need to add a better worldmap into that + + + +#' function to map grids +#' +#' Accepts an input of a \code{SpatialGridDataFrame} Plots a map of the grid +#' and optionally outputs to a gridAscii file and/or an image. +#' +#' +#' @param sGDF a \code{SpatialGridDataFrame} +#' @param sPDF an optional \code{SpatialPointsDataFrame} plotted if +#' \code{plotPoints=TRUE} +#' @param we western bounds of the area to plot, if not specified taken from +#' the \code{sGDF} +#' @param ea eastern bounds of the area to plot, if not specified taken from +#' the \code{sGDF} +#' @param so southern bounds of the area to plot, if not specified taken from +#' the \code{sGDF} +#' @param no northern bounds of the area to plot, if not specified taken from +#' the \code{sGDF} +#' @param gridValName the name of the attribute column to plot from the +#' \code{SpatialGridDataFrame} +#' @param plotTitle optional title to add to the plot +#' @param numCats how many categories to classify grid values into for map plot +#' (uses\code{pretty()}) classification) +#' @param paletteCats color pallete to use +#' @param addLegend whether to add a legend to the plot +#' @param legendx position of legend should be one of 'bottomright', 'bottom', +#' 'bottomleft', 'left', 'topleft', 'top', 'topright', 'right', 'center' +#' @param legendncol number of columns in the legend +#' @param legendtitle legend title +#' @param plotPoints whether to add the original points to the plot +#' @param colPoints color of points to plot +#' @param legPoints Logical. Points in legend +#' @param colland color of land +#' @param addICESgrid Logical. Adding ICES grid on top +#' @param addScale Logical. Adding axes +#' @param outGridFile optional name for a gridAscii file to be created from the +#' grid +#' @param outPlot optional name for a png file to be created from the plot +#' @param \dots NOT used yet +#' @author Andy South +#' @seealso \code{vmsGridCreate()} +#' @references EU Lot 2 project +#' @examples +#' +#' #mapGrid(dF, nameLon = "POS_LONGITUDE", nameLat = "POS_LATITUDE", +#' # cellsizeX = 0.5, cellsizeY = 0.5,legendx='bottomright', +#' # plotPoints=TRUE ) +#' +#' @export mapGrid +mapGrid <- function( sGDF + , sPDF + , we="" + , ea="" + , so="" + , no="" + , gridValName="fishing" + , plotTitle = "" + , numCats = 5 + , paletteCats = "heat.colors" + , addLegend = TRUE + , legendx='bottomleft' + , legendncol = 1 + , legendtitle = "fishing activity" + , plotPoints = FALSE + , colPoints =1 + , legPoints = FALSE + , colLand = 'sienna' + , addICESgrid = FALSE + , addScale = TRUE + , outGridFile = "" #name for output gridAscii + , outPlot = "" #name for output png + , ... ) +{ + +require(sp) +require(maptools) + +par(mar=c(4,6,1,1)) + +xlim0=c(we,ea) +ylim0=c(so,no) + +lstargs <- list(...) + +#dev.new() +if(length(lstargs$breaks0)==0) { + breaks0 <- pretty(sGDF[[gridValName]],n=numCats) + } else{ + breaks0 <- lstargs$breaks0 + } + +# rainbow, heat.colors, etc. +cols <- rev(do.call(paletteCats, list(length(breaks0)-1))) + +require(mapdata) +#windows(8,7) # what is this for ? +map("worldHires", add=FALSE,col=colLand,fill=TRUE, bg="white", xlim=xlim0 + c(+0.1,-0.1), ylim=ylim0 + c(+0.1,-0.1), +regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium', +'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria', +'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco', +'tunisia','algeria','egypt' ), mar=c(2,6,2,2)) + + im <- as.image.SpatialGridDataFrame(sGDF, attr=2) + image(im$x,im$y,im$z, axes=FALSE, col=cols, breaks = breaks0, + xlim=xlim0 , ylim=ylim0, add=TRUE ) + +#add ICES rectangles +if(addICESgrid){ + for(i in seq(-15,50, by=1)) abline(v=i) + for(i in seq(0, 75, by=0.5)) abline(h=i) + } + +map("worldHires", add=TRUE, col=colLand, fill=TRUE, bg="white", xlim=xlim0 , ylim=ylim0 , +regions=c('uk','ireland','france','germany','netherlands', 'norway','belgium', +'spain','luxembourg','denmark', 'sweden','iceland', 'portugal','italy','sicily','ussr','sardinia','albania','monaco','turkey','austria', +'switzerland','czechoslovakia','finland','libya', 'hungary','yugoslavia','poland','greece','romania','bulgaria', 'slovakia','morocco', +'tunisia','algeria','egypt' )) + + +box() # to put a box around the plot +#mtext(paste(gear,year),font=4,line=-1.5) +axis(1) +axis(2, las=2) + if(we>0){ + mtext("Degree East", side=1, line=2) + } else{ + mtext("Degree West", side=1, line=2) + } + if(no>0){ + mtext("Degree North", side=2, line=3) + } else{ + mtext("Degree South", side=2, line=3) + } + + + +# add a scale +if(addScale) map.scale(x=xlim0[2]-(xlim0[2]-xlim0[1])/2, y=ylim0[1], ratio=FALSE) + +#to add points (can obscure grid) +if (plotPoints) { + if(length(colPoints)!=1) { + colPoints <- factor(colPoints) + a.legPoints <- levels(colPoints) + levels(colPoints) <- colors()[(1:length(levels(colPoints))) *10] + points(sPDF, pch=16, col=as.character(colPoints),cex=0.5) + + if(length(legPoints)!=0){ + legend(x='bottomright', legend=a.legPoints, pch = 16, col=levels(colPoints), title="", ncol=2, bg="white", pt.cex=1) + } + }else{ + points(sPDF, pch='.', col=colPoints,cex=0.1) + } + } + +#legend(x='bottomleft', legend=breaks[1:(length(breaks)-1)], pch = 22, pt.bg=cols, title="fishing activity",bg="white",pt.cex=2 ) +legend(x=legendx, legend=breaks0[1:(length(breaks0)-1)], pch = 22, pt.bg=cols, title=legendtitle, ncol=legendncol, bg="white",pt.cex=2 ) + + +#to add plotTitle +if (plotTitle != "") mtext(plotTitle) + + + +} #end of mapGrid diff --git a/vmstools/R/maxRangeCI.r b/vmstools/R/maxRangeCI.r index 7e29653..c2722e1 100644 --- a/vmstools/R/maxRangeCI.r +++ b/vmstools/R/maxRangeCI.r @@ -1,53 +1,99 @@ - - -maxRangeCI <- function(x,y,Time,speed){ - - #Pre-Calculation to speed up the code - pi180 <- pi/180 - cosy1 <- cos(y[1]*pi180) - cosy2 <- cos(y[2]*pi180) - - #Calculate maximum distance in km - dmax <- Time/60*sum(speed,na.rm=TRUE)/2*1.852 - - #Calculate d from Haversine function - d <- distance(x[1],y[1],x[2],y[2]) - - #Calculate a and b as in Mills et al. 2006 paper - warn<- 0 - if(d >= dmax){ - warning(paste("Distance too far to reach with current speed estimate ",round(x,3)," ",round(y,3),"\n")) - dmax <- d - warn <- 1 - } - a <- dmax/2 - b <- sqrt((dmax^2 - d^2)/4) - - if(d == 0){ - o <- 0 - } else { - dx <- (x[2] - x[1])*pi180 - dy <- (y[2] - y[1])*pi180 - o <- atan2(sin(dx)*cosy2,cosy1*sin(y[2]*pi180)-sin(y[1]*pi180)*cosy2*cos(dx)) - angles <- (o*(180/pi)) %% 360 - - angle2 <- ifelse(angles >= 0 & angles < 180, 90 - angles,270-angles) - o <- angle2*(pi180) - } - #See also: http://www.movable-type.co.uk/scripts/latlong.html - Bx <- cosy2*cos((x[2]-x[1])*pi180) - By <- cosy2*sin((x[2]-x[1])*pi180) - mid.x <- (x[1]*pi180) + atan2(By,cosy1+Bx) - mid.y <- atan2(sin(y[1]*pi180) + sin(y[2]*pi180),sqrt((cosy1+Bx)^2 + By^2)) - mid.x <- mid.x*180/pi - mid.y <- mid.y*180/pi - - a <- c(km2Degree(mid.x,mid.y,a),a/111.2) - b <- c(km2Degree(mid.x,mid.y,b),b/111.2) - - #See also Pfoser and Jensen 1999 Capturing the Uncertainty of Moving-Object representation - u <- 0:360*pi180 - xres <- mid.x + a[1] * cos(o) * cos(u) - b[1] * sin(o) * sin(u) - yres <- mid.y + a[2] * sin(o) * cos(u) + b[2] * cos(o) * sin(u) - - return(list(matrix(c(xres,yres),ncol=2),dmax,warn))} \ No newline at end of file +#' Computation of outer range of vessel trajectory between two succeeding +#' points +#' +#' A vessel can, based on an assumed speed and time interval, only travel a +#' certain maximum range between two succeeding VMS / GPS points. This outer +#' range can be describes as an ellipse surrounding these two points. The outer +#' region returned is a matrix of 360 x-y coordinates. As well, the maximum +#' distance able to travel depending on the speed and time interval is returned +#' as is a warning if the measured distance between the two succeeding points +#' is larger than the possible distance able to travel in the amount of time +#' and speed given, indicating that the vessel must have travelled faster than +#' the given speeds. +#' +#' A list is returned with in the first element the 360 x-y coordinates of the +#' outer region. In the second element, the maximum distance able to travel +#' given the speed and time interval, is returned. The third element holds a +#' warning if the maximum distance computed is exceeded. If warn equals 0, the +#' maximum distance is not exceeded. If warn equals 1, this distance is +#' exceeded. +#' +#' @param x Longitudes of the GPS positions +#' @param y Latitudes of the GPS positions +#' @param Time Time in minutes between the succeeding datapoints +#' @param speed Speeds at the GPS positions in nots +#' @note Used within the plotCIinterpolation() function. +#' @author Niels T. Hintzen +#' @seealso \code{\link{plotCIinterpolation}}, \code{\link{interpolateTacsat}}, +#' \code{\link{N1p0}}, \code{\link{plotInterpolation}} +#' @references Pfoser and Jensen 1999 Capturing the Uncertainty of +#' Moving-Object Representations, Mills et al. 2006 Estimating high resolution +#' trawl fishing effort from satellite-based vessel monitoring system data, +#' Hintzen et al. 2010 Improved estimation of trawling tracks using cubic +#' Hermite spline interpolationof position registration data +#' @examples +#' +#' data(tacsat) +#' tacsat <- formatTacsat(tacsat) +#' #shorten the tacsat file for example use and add date-time combination +#' tacsat <- tacsat[1:10,] +#' lon <- tacsat$SI_LONG[c(3,4)] +#' lat <- tacsat$SI_LATI[c(3,4)] +#' #Calculate maximum range to be used within Confidence Interval calculation +#' timeDiff <- sum(an(unlist(strsplit(tacsat$SI_TIME[4],":"))) * c(60,1,1/60)) +#' - sum(an(unlist(strsplit(tacsat$SI_TIME[3],":"))) * c(60,1,1/60)) +#' res <- maxRangeCI(lon,lat,timeDiff,tacsat$SI_SP[c(3,4)]) +#' +#' +#' @export maxRangeCI +maxRangeCI <- function(x,y,Time,speed){ + + #Pre-Calculation to speed up the code + pi180 <- pi/180 + cosy1 <- cos(y[1]*pi180) + cosy2 <- cos(y[2]*pi180) + + #Calculate maximum distance in km + dmax <- Time/60*sum(speed,na.rm=TRUE)/2*1.852 + + #Calculate d from Haversine function + d <- distance(x[1],y[1],x[2],y[2]) + + #Calculate a and b as in Mills et al. 2006 paper + warn<- 0 + if(d >= dmax){ + warning(paste("Distance too far to reach with current speed estimate ",round(x,3)," ",round(y,3),"\n")) + dmax <- d + warn <- 1 + } + a <- dmax/2 + b <- sqrt((dmax^2 - d^2)/4) + + if(d == 0){ + o <- 0 + } else { + dx <- (x[2] - x[1])*pi180 + dy <- (y[2] - y[1])*pi180 + o <- atan2(sin(dx)*cosy2,cosy1*sin(y[2]*pi180)-sin(y[1]*pi180)*cosy2*cos(dx)) + angles <- (o*(180/pi)) %% 360 + + angle2 <- ifelse(angles >= 0 & angles < 180, 90 - angles,270-angles) + o <- angle2*(pi180) + } + #See also: http://www.movable-type.co.uk/scripts/latlong.html + Bx <- cosy2*cos((x[2]-x[1])*pi180) + By <- cosy2*sin((x[2]-x[1])*pi180) + mid.x <- (x[1]*pi180) + atan2(By,cosy1+Bx) + mid.y <- atan2(sin(y[1]*pi180) + sin(y[2]*pi180),sqrt((cosy1+Bx)^2 + By^2)) + mid.x <- mid.x*180/pi + mid.y <- mid.y*180/pi + + a <- c(km2Degree(mid.x,mid.y,a),a/111.2) + b <- c(km2Degree(mid.x,mid.y,b),b/111.2) + + #See also Pfoser and Jensen 1999 Capturing the Uncertainty of Moving-Object representation + u <- 0:360*pi180 + xres <- mid.x + a[1] * cos(o) * cos(u) - b[1] * sin(o) * sin(u) + yres <- mid.y + a[2] * sin(o) * cos(u) + b[2] * cos(o) * sin(u) + + return(list(matrix(c(xres,yres),ncol=2),dmax,warn))} diff --git a/vmstools/R/mcp.area.r b/vmstools/R/mcp.area.r index 2d48bab..90b02ce 100644 --- a/vmstools/R/mcp.area.r +++ b/vmstools/R/mcp.area.r @@ -1,44 +1,58 @@ -#This is taken from gpclib package -mcp.area <- function(xy, id, percent = seq(20, 100, by = 5), - unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){ - unin <- match.arg(unin) - unout <- match.arg(unout) - if (length(id) != nrow(xy)) - stop("xy and id should be of the same length") - xy <- xy[!is.na(xy[, 1]), ] - xy <- xy[!is.na(xy[, 2]), ] - id <- id[!is.na(xy[, 1])] - id <- id[!is.na(xy[, 2])] - lev <- percent - res <- list() - ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id))) - lixy <- split(xy, id) - le <- names(lixy) - for (i in 1:length(lev)) { - ar[i, ] <- unlist(lapply(lixy, function(z) { - res <- mcp(z, rep(1, nrow(z)), percent = lev[i]) - class(res) <- "data.frame" - return(area.poly(as(res[, 2:3], "gpc.poly"))) - })) - } - ar <- as.data.frame(ar) - names(ar) <- le - if (unin == "m") { - if (unout == "ha") - ar <- ar/10000 - if (unout == "km2") - ar <- ar/1e+06 - } - if (unin == "km") { - if (unout == "ha") - ar <- ar * 100 - if (unout == "m2") - ar <- ar * 1e+06 - } - row.names(ar) <- lev - class(ar) <- c("hrsize", "data.frame") - attr(ar, "units") <- unout - if (plotit) - plot(ar) - return(ar) -} +#This is taken from gpclib package + + +#' Hidden function to calculate Minimum Convex Polygon +#' +#' +#' +#' @param xy +#' @param id +#' @param percent +#' @param unin +#' @param unout +#' @param plotit +#' @author adehabitat +#' @export mcp.area +mcp.area <- function(xy, id, percent = seq(20, 100, by = 5), + unin = c("m","km"), unout = c("ha", "km2", "m2"), plotit = TRUE){ + unin <- match.arg(unin) + unout <- match.arg(unout) + if (length(id) != nrow(xy)) + stop("xy and id should be of the same length") + xy <- xy[!is.na(xy[, 1]), ] + xy <- xy[!is.na(xy[, 2]), ] + id <- id[!is.na(xy[, 1])] + id <- id[!is.na(xy[, 2])] + lev <- percent + res <- list() + ar <- matrix(0, nrow = length(lev), ncol = nlevels(factor(id))) + lixy <- split(xy, id) + le <- names(lixy) + for (i in 1:length(lev)) { + ar[i, ] <- unlist(lapply(lixy, function(z) { + res <- mcp(z, rep(1, nrow(z)), percent = lev[i]) + class(res) <- "data.frame" + return(area.poly(as(res[, 2:3], "gpc.poly"))) + })) + } + ar <- as.data.frame(ar) + names(ar) <- le + if (unin == "m") { + if (unout == "ha") + ar <- ar/10000 + if (unout == "km2") + ar <- ar/1e+06 + } + if (unin == "km") { + if (unout == "ha") + ar <- ar * 100 + if (unout == "m2") + ar <- ar * 1e+06 + } + row.names(ar) <- lev + class(ar) <- c("hrsize", "data.frame") + attr(ar, "units") <- unout + if (plotit) + plot(ar) + return(ar) +} diff --git a/vmstools/R/mergeEflalo2Pings.r b/vmstools/R/mergeEflalo2Pings.r index f1c2f96..6e0ca7e 100644 --- a/vmstools/R/mergeEflalo2Pings.r +++ b/vmstools/R/mergeEflalo2Pings.r @@ -1,1079 +1,1299 @@ - -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# -# A 'R' ROUTINE FOR THE COUPLING OF VMS AND LOGBOOKS -# WP4 - Lot2 EU tender VMS/LOGBOOKS COUPLING -# author: Francois Bastardie (DTU- Aqua; fba@aqua.dtu.dk) -# January 2010 -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# -#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# - - - - - - -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!MERGE LOGBOOKS WITH VMS PER VESSEL!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - - -#!!!!!!!!!!!!!!!!!!!!!# -#!!!!!!!!!!!!!!!!!!!!!# -mergeEflalo2Pings <- - function(eflalo, tacsat, vessels=unique(eflalo$VE_REF), general=list(output.path=file.path("C:"), - visual.check=TRUE, detectFishing=FALSE, speed="segment", what.speed="calculated", conserve.all=TRUE, - ), ...){ - - lstargs <- as.list( sys.call() ) # equivalent to lstargs <- list(...) but suppress the r cmd build warning? - - # create required folders for outputs - cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n") - dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777") - - - #!!!!!!!!!!!!!!!!!!!!!# - #!!!!!!!!!!!!!!!!!!!!!# - # utils-- - collapse.all.columns <- function (obj, columns= seq(ncol(obj)) ){ - eval(parse(text=paste('paste(obj[,', paste(columns,collapse='] ,"#", obj[,'), '],sep=\'\')', sep=''))) } - uncollapse.column <- function(obj, column="coll"){ - dd<- strsplit(as.character(obj[,column]),"#") ; nco <- length(dd[[1]]) ; dd<- unlist(dd) - res <- eval(parse(text=paste('data.frame(',paste('dd[seq(',1:nco,',nrow(obj)*nco,by=nco)]', collapse=','),')'))) - colnames(res) <- paste("col",1:nco,sep='') - return(res) - } - - #utils-- - # FUNCTION TO CREATE A SPATIAL GRID - # 'xx' have a 'SI_LATI' and a 'SI_LONG' columns - assignPointsToSpatialGrid <- function(xx){ - - xx <- xx[,!colnames(xx) %in% c("icessquare","icessquare.vms") ] # remove - xx <- cbind.data.frame(xx, icessquare= rep(0,nrow(xx))) - - - rlong <- range(anf(xx$SI_LONG),na.rm=TRUE) - vect.long <- signif(seq(floor(rlong[1]), ceiling(rlong[2]), by=1),4) # long (x) - label.long <- rep(paste(rep(LETTERS,each=10),0:9,sep=""),each=1) - names(label.long) <- signif(seq(-50, 209, by=1),4) # long (x) - label.long <- label.long[!is.na(names(label.long))] # => correspondance long (-50 to 209) / sq letter (A0 to Z9) - label.long <- label.long[as.character(vect.long)] - rlat <- range(anf(xx$SI_LATI), na.rm=TRUE) - vect.lat <- signif(seq(floor(rlat[1]), ceiling(rlat[2]),by=0.5),4) # lat (y) - label.lat <- rep(paste(seq(1,75,1)),each=1) - names(label.lat) <- paste(signif(seq(36,73, by=0.5),4)) - label.lat <- label.lat[!is.na(names(label.lat))] # => correspondance lat (36 to 73) / sq number (1 to 75) - label.lat <- label.lat[as.character(vect.lat)] - vect.label <- paste(rep(label.lat,each=length(label.long)),"",label.long,sep="") - xx[,"SI_RECT"] <- paste(label.lat [findInterval(anf(xx[,"SI_LATI"]), vect.lat)] , label.long [findInterval(anf(xx[,"SI_LONG"]), vect.long)], sep="") - - return(xx) - } - - - #!!!!!!!!!!!!!!!!!!!!!# - #utils-- - # for managing NA on logbook side - # (from vms trip.sq without corresponding logbook trip.sq e.g. because no declaration in sq because only steaming time inside) - # we need to inform back the specificity of the vessel from logbook using info from the same trip i.e. vesselid+FT_REF - retrieveOnBkSide <- function(merged, type.data){ - idx <- which(merged$LE_MET_level6=="NA") - merged.NA <- merged[idx,] # input (only the trip.sq with NA for the logbook part) - - for (td in type.data){ - map <- tapply(merged[, td ], paste(merged$VE_REF, merged$FT_REF), - function(i) {ss<- unique(as.character(i)) ; ss[ss!="NA"][1]}) - merged.NA[, td ] <- factor(paste(merged.NA$VE_REF,merged.NA$FT_REF)) - levels(merged.NA[, td ]) <- map[levels(merged.NA[, td ])] - } - if(nrow(merged.NA)>0) merged.NA$flag <- 4 # flag on meth - merged[idx,] <- merged.NA # output - return(merged) - } - - #!#!##!#!##!#!##!#!##!#!##!#!# - #!#!##!#!##!#!##!#!##!#!##!#!# - #!#!##!#!##!#!##!#!##!#!##!#!# - #!#!##!#!##!#!##!#!##!#!##!#!# - #!#!##!#!##!#!##!#!##!#!##!#!# - all.vesselid <- as.character(unique(eflalo[anf(eflalo$VE_LEN)>=0,]$VE_REF)) - all.vesselid <- all.vesselid[!is.na(all.vesselid)] # e.g. when VE_LEN at NA exists - if(length(vessels)!=0) all.vesselid <- vessels - # => IF ARG INFORMED, THEN KEEP ONLY ONE OR SEVERAL VESSELS AS NEEDED.... - - for(a.vesselid in all.vesselid){ # PER VESSEL - cat(paste(a.vesselid,"\n", sep="" )) - - #---------- - #---------- - #---------- - #---------- - #---------- - #---------- - # LOGBOOK INPUT - logbk.this.vessel <- eflalo[eflalo$VE_REF %in% a.vesselid,] - logbk.this.vessel$LE_RECT <- factor(logbk.this.vessel$LE_RECT) - logbk.this.vessel$VE_REF <- factor(logbk.this.vessel$VE_REF) - logbk.this.vessel$VE_FLT <- factor(logbk.this.vessel$VE_FLT) - - - # automatic detection of a.year - general$a.year <- format(strptime( paste(logbk.this.vessel$FT_DDAT[1]) , tz='GMT', "%e/%m/%Y" ), "%Y") - - # departure time - logbk.this.vessel$LE_DTIME <- as.POSIXct( paste(logbk.this.vessel$FT_DDAT, logbk.this.vessel$FT_DTIME) , - tz='GMT', "%e/%m/%Y %H:%M" ) - # arrival time - logbk.this.vessel$LE_LTIME <- as.POSIXct( paste(logbk.this.vessel$FT_LDAT, logbk.this.vessel$FT_LTIME) , - tz='GMT', "%e/%m/%Y %H:%M" ) - # catch.date - logbk.this.vessel$LE_CTIME <- as.POSIXct( paste(logbk.this.vessel$LE_CDAT) , tz='GMT', "%e/%m/%Y" ) - - # mid time bk trips - LE_MIDTIME <- rep(NA, nrow(logbk.this.vessel)) - dep <- logbk.this.vessel$LE_DTIME +10 # we artificially add +10min because bug in R if mid-time is 00:00:00 - arr <- logbk.this.vessel$LE_LTIME +1 - for(r in 1:length(dep)){ - LE_MIDTIME[r] <- as.character(seq(from=dep[r], to=arr[r], length.out = 3)[2]) - } - logbk.this.vessel$LE_MIDTIME <- LE_MIDTIME - - if(!"FT_REF" %in% colnames(logbk.this.vessel) ) { - logbk.this.vessel$FT_REF <- factor(LE_MIDTIME) # init - levels(logbk.this.vessel$FT_REF) <- 1:length(logbk.this.vessel$FT_REF) # assign a FT_REF code - } # only if FT_REF is actually not already informed - - - - - #=> LOGBOOK (EFLALO) INPUT REQUIRES AT LEAST, - # 'VE_REF', FT_DDAT, FT_DTIME, FT_LDAT, FT_LTIME, FT_CDAT, - # 'LE_SP_KG' (etc.), 'LE_RECT', 'VE_FLT' AND 'LE_MET_level6', 'LE_GEAR' COLUMNS - # - - #---------- - #---------- - #---------- - #---------- - #---------- - #---------- - # VMS INPUT: load traj with 'at sea' pings SI_STATE informed - # ABSOLUTELY REQUIRED: c("VE_REF","SI_LATI","SI_LONG", "SI_DATE", "SI_TIME", "SI_FT", "SI_HARB", "SI_STATE") - - - if(a.vesselid %in% unique(tacsat$VE_REF)){ - - tacsat.this.vessel <- tacsat[tacsat$VE_REF == a.vesselid,] # subset for this vessel - tacsat.this.vessel$VE_REF <- factor(tacsat.this.vessel$VE_REF) - - - # if does not exist, add SI_DATIM for handling the time in R - tacsat.this.vessel$SI_TIME <- as.character(tacsat.this.vessel$SI_TIME) - tacsat.this.vessel[tacsat.this.vessel$SI_TIME=="24:00", "SI_TIME"] <- "00:00" # debug - tacsat.this.vessel[tacsat.this.vessel$SI_DATE=="29/02/1801", "SI_DATE"] <- "01/03/1801" # debug - tacsat.this.vessel$SI_DATIM <- as.POSIXct( paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME) , - tz='GMT', "%d/%m/%Y %H:%M" ) - tacsat.this.vessel <- tacsat.this.vessel[!is.na(tacsat.this.vessel$SI_DATIM),] # debug e.g. when 29/02/1801 - - - # keep only the essential - vms.this.vessel <- tacsat.this.vessel [, c("VE_REF","SI_LATI","SI_LONG", - "SI_DATIM","SI_FT", "SI_SP", "SI_HE", "SI_HARB", "SI_STATE")] - rm(tacsat.this.vessel); gc(reset=TRUE) - vms.this.vessel$VE_REF <- factor(vms.this.vessel$VE_REF) - - vms.this.vessel$idx <- 1:nrow(vms.this.vessel) # label for each ping - - - - - # filter if vessel with a bad vms - to.remove.because.deficient.vms <- any(is.na(vms.this.vessel$SI_FT)) - to.remove.because.not.enough.vms.trips <- length(unique(vms.this.vessel$SI_FT))< 2 # nb vms trips < 2 - if(length(unique(vms.this.vessel$SI_FT))<2) warning('need more than 1 trip in SI_FT') - - # filter if vessel with a bad logbook - to.remove.because.pble.lgbk <- length(unique(logbk.this.vessel$FT_REF))< 2 # nb logbk trips < 2 - - # then... - a.flag <- to.remove.because.deficient.vms || to.remove.because.not.enough.vms.trips || to.remove.because.pble.lgbk - - ## remove FT_REF and SI_MIDTIME if it exists - vms.this.vessel <- vms.this.vessel[, !colnames(vms.this.vessel) %in% c("FT_REF", "SI_MIDTIME")] - - - - if(a.flag==FALSE) { # i.e. vms-equipped - - if(all(is.na(vms.this.vessel$SI_STATE)) && general$detectFishing==FALSE) - stop('the SI_STATE column has to be informed before making the merging') - if(all(is.na(logbk.this.vessel$VE_FLT))) - stop('the VE_FLT column has to be informed before making the merging') - - # alias - .logbk <- logbk.this.vessel - .vms <- vms.this.vessel - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - #! DO THE LINK - APPROACH 1 #!#!!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - NIELS <- FALSE - if(NIELS){ - eftim <- .logbk[which(duplicated(.logbk$FT_REF)==FALSE),c("LE_DTIME","LE_LTIME","FT_REF")] - dtime <- eftim[,1] - ltime <- eftim[,2] - stime <- .vms$SI_DATIM - tripn <- eftim[,3] - - - - smdtime <- t(outer(stime,dtime,"-")) - gtltime <- outer(ltime,stime,"-") - - #-Find first point where tacsat time is greater or equal to departure time and smaller than arrival time - st <- apply(smdtime,1,function(x){which(x>=0)[1]}) - en <- apply(gtltime,1,function(x){rev(which(x>=0))[1]}) - - #-Make sure that values are within the interval of departure and arrival time - subse <- which(is.na(st <= en) == FALSE & (st <= en) == TRUE) - - st <- st[subse] - en <- en[subse] - - #-Assign Tacsat data with FT_REF from Eflalo2 dataset where they link - - if(length(st)!=1){ - - idx <- unlist(mapply(seq,st,en,SIMPLIFY=FALSE)) - reps <- unlist(lapply(mapply(seq,st,en,SIMPLIFY=FALSE),length)) - .vms$FT_REF <- 0 - .vms$FT_REF[idx] <- rep(tripn[subse],reps) - } - if(length(st)==1){ - .vms$FT_REF <- 0 - .vms$FT_REF[seq(st,en)] <- rep(tripn[subse],length(seq(st,en))) - } - if(length(st)==0){ - - .vms$FT_REF <- 0 - } - - - } # end NIELS - - FRANCOIS <- TRUE - if(FRANCOIS){ - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - #! DO THE LINK - APPROACH 2 #!#!!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - - #!!!!!!!!!!!!!!!!!!# - #!!!!!!!!!!!!!!!!!!# - # -If IT DOES NOT EXIST YET-, - # FIND THE MID-TIME OF VMS TRIPS - if(any(colnames(.vms)%in%"SI_DATIM")){ - if(!any(colnames(.vms)%in%"SI_DTIME")){ - # find and add the first point of each trip - .vms$start.trip <- c(1,diff(.vms[,"SI_FT"])) - .vms$end.trip <- c(diff(.vms[,"SI_FT"]), .vms[nrow(.vms),"SI_FT"]) - .vms[.vms$start.trip>0, "start.trip"] <- .vms[.vms$start.trip>0, "SI_FT"] - .vms[.vms$end.trip>0, "end.trip"] <- .vms[.vms$end.trip>0, "SI_FT"] - - tmp <- .vms[.vms$start.trip>0,] - tmp <- tmp[,c("VE_REF","SI_DATIM","SI_FT")] - tmp2 <- .vms[.vms$end.trip>0,] - tmp2 <- tmp2[,c("VE_REF","SI_DATIM","SI_FT")] - .vms <- .vms[,!colnames(.vms) %in% c("start.trip", "end.trip")] # remove tool columns - table.midtime <- merge(tmp, tmp2, by.x="SI_FT", by.y="SI_FT") - table.midtime <- table.midtime[, c("SI_FT","VE_REF.x","SI_DATIM.x","SI_DATIM.y") ] - colnames(table.midtime) <- c("SI_FT","VE_REF","SI_DTIME","SI_ATIME") - } else{ - table.midtime <- .vms[, c("SI_FT","VE_REF","SI_DTIME","SI_ATIME") ] - table.midtime <- table.midtime[!duplicated(data.frame(table.midtime$SI_FT, table.midtime$VE_REF)),] - } - } else{stop("no 'SI_DATIM' found in vms")} - SI_MIDTIME <- rep(0, nrow(table.midtime)) - - for(r in 1: nrow(table.midtime)){ - SI_MIDTIME[r] <- as.character(seq(from=table.midtime$SI_DTIME[r], to=table.midtime$SI_ATIME[r], length.out = 3)[2]) - - } - table.midtime$SI_MIDTIME <- SI_MIDTIME - if(!any(colnames(.vms)%in%"SI_MIDTIME")){ # here we are... - .vms <- merge(.vms, table.midtime[,c("SI_FT","SI_MIDTIME")], by.x="SI_FT", by.y="SI_FT") - } - - - - #!!!!!!!!!!!!!!!!!!# - #!!!!!!!!!!!!!!!!!!# - # ASSIGN A 'BK.TRIPNUM' FROM LOGBOOK TO EACH VMS TRIP - trunk <-1 # trunk give the part of the year to be plotted (1 to 5) - # visual check - if(general$visual.check){ - windows(width=8, height=4) - ltrunk <- (nrow(table.midtime)/5) - idxtrunk <- (trunk+(trunk-1)*ltrunk):(trunk*ltrunk) - # plot(table.midtime$SI_DTIME[idxtrunk],rep(1,length(table.midtime$SI_DTIME[idxtrunk])), - plot(table.midtime$SI_DTIME, rep(1,length(table.midtime$SI_DTIME)), - ylim=c(0,0.52), type="n", ylab="", axes=FALSE) - r <- as.POSIXct(round(range(table.midtime$SI_DTIME), "days")) - axis.POSIXct(1, at=seq(r[1], r[2], by="month"), format="%e%b%y:%H:%M") - axis(2, at=c(0.5,0.1),labels=c("VMS","LOGBOOK")) - - for(i in 1:nrow(table.midtime)) { - segments(as.POSIXct(table.midtime$SI_DTIME[i], tz='GMT'), 0.5, as.POSIXct(table.midtime$SI_ATIME[i], tz='GMT'), 0.5, col=1) - points(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.5, col=1) - text(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.52, table.midtime$SI_FT[i], cex=0.5, col=1) - - } - - tmp <- .logbk[, c("LE_DTIME","LE_LTIME", "LE_MIDTIME", "FT_REF")] - tmp <- tmp[!duplicated(tmp$LE_MIDTIME), ] - for(i in 1:nrow(tmp)){ - segments(as.POSIXct(tmp$LE_DTIME[i], tz='GMT'), 0.1, as.POSIXct(tmp$LE_LTIME[i], tz='GMT'), 0.1, col=1) - points(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.1, col=1) - text(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.0785, tmp$FT_REF[i], cex=0.5, col=1) - } - } - - - # THE CORE CODE: compare bk$LE_MIDTIME and vms$SI_MIDTIME - # find the nearest bk$LE_MIDTIME for each vms$SI_MIDTIME - # and then change levels - # (so, for each mid.time in vms, a FT_REF will be find) - # (so, no lines in vms without a FT_REF from bk...) - fa1 <- levels(factor(.vms$SI_MIDTIME)) - new.levels <- fa1 - fa2 <- levels(factor(.logbk$LE_MIDTIME)) - for(i in 1:length(fa1)) { # for each level in vms - tmp <- abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] )) - if(all(is.na(tmp))) tmp <- abs(as.numeric( as.Date(fa2) - as.Date(fa1)[i] )) # debug the R bug in case of mid-time at 00:00 hour - new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )] # find the nearest level in logbook - } - .vms$SI_MIDTIME <- factor(as.character(.vms$SI_MIDTIME)) - sauv <- .vms$SI_MIDTIME - levels(.vms$SI_MIDTIME) <- new.levels # and change mid.time in vms to force the merging - - # finally, replace levels by the FT_REF - tmp <- .logbk[.logbk$LE_MIDTIME %in% .vms$SI_MIDTIME , c("FT_REF","LE_MIDTIME")] - tmp2 <- tmp[!duplicated(tmp$FT_REF),] - idx <- match( levels(.vms$SI_MIDTIME), tmp2$LE_MIDTIME ) - .vms$FT_REF <- .vms$SI_MIDTIME # init - levels(.vms$FT_REF) <- as.character(tmp2$FT_REF ) [idx] - - - if(general$visual.check){ - for(i in 1: nrow(.vms)) { - arrows(as.POSIXct( sauv[i]), 0.5 ,as.POSIXct( .vms$SI_MIDTIME[i], tz='GMT'),0.1, length=0.1) - } - } - - if(general$visual.check){ - ve <- as.character(.logbk$VE_REF[1]) - savePlot(filename = file.path(general$output.path, - paste("assign_eflalo_tripnum_to_vms_",ve,"_",general$a.year,".jpeg",sep="")),type ="jpeg") - dev.off() - } - - - ## ADD A WARNING IN CASE OF LONG (UNREALISTIC) TRIPS ## - diff.date <- table.midtime$SI_ATIME - table.midtime$SI_DTIME # if at least one trip >30 days - if(attributes(diff.date)$units=="secs") idx <- which((((diff.date)/3600)/24) >30) - if(attributes(diff.date)$units=="hours") idx <- which((((diff.date)/1)/24) >30) - attributes((table.midtime$SI_ATIME - table.midtime$SI_DTIME )) - if (length( idx) >0){ - cat(paste("at least one vms trip > 30 days detected! check harbours...", "\n", sep="")) - suspicious <- .vms[.vms$SI_FT %in% table.midtime$SI_FT[idx] ,] - tmp <- table(suspicious$SI_LATI) - lat.suspicious <- names(tmp[tmp>5]) - if(length(lat.suspicious)!=0) cat(paste("potential harbour likely near lat ",lat.suspicious,"\n",sep="")) - tmp <- table(suspicious$SI_LONG) - long.suspicious <- names(tmp[tmp>5]) - if(length(long.suspicious)!=0) cat(paste("potential harbour likely near long ",long.suspicious,"\n",sep="")) - } # if at least one trip >30 days - rm(table.midtime) ; gc(reset=TRUE) - - - - .logbk$LE_MIDTIME <- factor(.logbk$LE_MIDTIME) - .logbk$FT_REF <- factor(.logbk$FT_REF) - .vms$SI_MIDTIME <- factor(.vms$SI_MIDTIME) - .vms$FT_REF <- factor(.vms$FT_REF) - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - #! ASSIGN A 'SI_FT' FROM VMS TRIP NUM TO #!#!#!# - #! LOGBOOK TRIPS WITH NO VMS CORRESPONDANCE #!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - dep.bk.not.in.vms <- unique( .logbk$FT_REF [ !( .logbk$FT_REF %in% .vms$FT_REF ) ] ) - if(length(dep.bk.not.in.vms)!=0){ - # bk tripnum from dep not in vms - idx <- .logbk$FT_REF %in% dep.bk.not.in.vms - bk <- .logbk[idx,] [order(.logbk[idx,]$LE_DTIME),] - if(!"SI_MIDTIME" %in% colnames(.vms)){ - vms <- .vms [order(.vms$SI_DTIME),] - SI_MIDTIME <- rep(NA, nrow(vms)) - for(r in 1: nrow(vms)){ - SI_MIDTIME[r] <- as.character(seq(from=vms$SI_DTIME[r], to=vms$SI_ATIME[r], length.out = 3)[2]) - } - vms$SI_MIDTIME <- SI_MIDTIME - } else{ vms <- .vms[order(.vms$SI_MIDTIME),]} - #1- compare bk$mid.time and vms$mid.time - # find the nearest vms$mid.time for each bk$mid.time - # and then change levels - # (so for each mid.time in bk, a tripnum will be find) - # (so no lines in bk without a tripnum...) - fa1 <- levels(factor(bk$LE_MIDTIME)) - new.levels <- fa1 - fa2 <- levels(factor(vms$SI_MIDTIME)) - for(i in 1:length(fa1)) { # for each level in logbk - tmp <- abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] )) - new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )] # find the nearest level in vms - } - bk$LE_MIDTIME <- factor(as.character(bk$LE_MIDTIME)) - levels(bk$LE_MIDTIME) <- new.levels # and change mid.time in logbk to force the merging - - # finally, replace levels by the tripnum - # (note: a same FT_REF in vms can have different mid.time - # due to the first merging of vms to logbk in the vms analysis) - tmp <- vms[vms$SI_MIDTIME %in% bk$LE_MIDTIME , c("FT_REF","SI_MIDTIME")] - tmp2 <- tmp[!duplicated(data.frame(tmp$FT_REF, tmp$SI_MIDTIME)),] - idx2 <- match(levels(bk$LE_MIDTIME), tmp2$SI_MIDTIME) - bk$FT_REF <- bk$LE_MIDTIME # init - levels(bk$FT_REF) <- as.character(tmp2$FT_REF) [idx2] - - # output - bk$LE_MIDTIME <- as.character(bk$LE_MIDTIME) - bk$FT_REF <- as.character(bk$FT_REF) - .logbk[idx,][order(.logbk[idx,]$LE_DTIME),] <- bk - } - - - } # end FRANCOIS - - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # ASSIGN A RECTANGLE TO EACH PING #!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - .vms <- assignPointsToSpatialGrid(xx=.vms) - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # COMPUTE EFFORT.MINS !#!#!#!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # vms - .vms <- .vms[order(.vms$SI_DATIM),] - .vms$LE_EFF_VMS <- abs(c(0, as.numeric(.vms[-nrow(.vms),"SI_DATIM"] - - .vms[-1,"SI_DATIM"], units="mins"))) - start.trip <- c(1,diff(.vms[,"SI_FT"])) - .vms[start.trip!=0, "LE_EFF_VMS"] <- 0 # just correct for the trip change points - # logbook (start/end of trip)- caution: will be repeated per ping in the merged output - #SI_DATIM <- as.POSIXct(paste(.logbk$FT_DDAT,.logbk$FT_DTIME,sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - #SI_DATIM2 <- as.POSIXct(paste(.logbk$FT_LDAT,.logbk$FT_LTIME,sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - #.logbk$LE_EFF <- an(difftime(SI_DATIM2,SI_DATIM,units="mins")) - - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # ASSIGN FISHING/NON-FISHING (optional)!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - if(general$detectFishing && general$speed=="segment") { - ## add a gear form tacsat from the logbook info (after the first merging) - ## because the assignement of a state is gear-specific. - ## caution here: we assume only one gear used inside a trip... - # because note that we remove 'logevent' and keep only one duplicate of tripnum - .vms$LE_GEAR <- factor(.vms$FT_REF) # init - tmp <- .logbk[,c("LE_GEAR","FT_REF")] - tmp <- tmp[!duplicated(tmp$FT_REF),] #remove logevent and keep only one duplicate of tripnum - tmp <- tmp[tmp$FT_REF %in% unique(.vms$LE_GEAR),] - idx <- match(levels(.vms$LE_GEAR), as.character(tmp$FT_REF)) - dd <- as.character(tmp$LE_GEAR) [idx] - dd <- replace(dd, is.na(dd), "UKN") # unknown because not matched if Niels - levels(.vms$LE_GEAR) <- dd - - # then do the assignement of the state - # according to a segmented regression on the speed histogram - .vms <- segmentTacsatSpeed ( - tacsat=.vms, - vessels=a.vesselid, - force.lower.bound=0.5, # to do: create an arg in the parent function instead... - gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'), # to do: create an arg in the parent function instead... - general=list(a.year=general$a.year, - output.path=general$output.path, - what.speed=general$what.speed, - visual.check=TRUE - ) - ) - #=> (semi)automatic detection of the fishing peak - # (put here because the LE_GEAR need to be informed) - # alternatively, - if(FALSE){ - .vms$SI_STATE <- 0 - .vms <- segmentedTacsatSpeed(.vms[.vms$VE_REF==a.vesselid,], units="year", analyse.by="LE_GEAR", - speed="instantaneous", logfit=FALSE, CI=0.95) - .vms$SI_STATE <- .vms$SI_STATE+1 # back compatibility with mergeEflalo2Pings() i.e. 1: fishing, 2: steaming - # ...but what about the passive gears then? - } - - - .vms <- .vms[, !colnames(.vms) %in% "LE_GEAR"] # remove after use to avoid future conflict. - } - # some alternatives TO DO: - #if(general$detectFishing && general$speed=="lookuptable") - # .vms <- lookupSpeedTacsat (tacsat=.vms, vessels=a.vesselid) - #if(general$detectFishing && general$speed=="bayesian") - # .vms <- bayesianFiltering (tacsat=.vms, vessels=a.vesselid) - - - - rm(er); rm(xx) ; gc(reset=TRUE) - - - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # MERGING WITH VMS PER TRIP !!!!!!!!!!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - do.merging <- function(method="FT_REF", .logbk, .vms, general){ - - # IF BY PING------------- - # find total nb of FISHING ping per tripnum from vms # used for method 1 'FT_REF' - if(method=="FT_REF"){ - # .vms$count.fping.trip <- factor(.vms$FT_REF) # init - # count.fping.trip <- table(.vms[.vms$SI_STATE==1,]$FT_REF) - # # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging - # levels(.vms$count.fping.trip) <- count.fping.trip[levels(.vms$count.fping.trip)] # mapping - # .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA - - # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging - if(any(.vms$SI_STATE==1)){ - .vms$count.fping.trip <- factor(.vms$FT_REF) # init - countp <- countPings(~VE_REF+FT_REF, .vms[.vms$SI_STATE=="1",]) - rownames(countp) <- countp$FT_REF - levels(.vms$count.fping.trip) <- countp[levels(.vms$count.fping.trip),"pings"] # mapping - if(any(.vms$SI_STATE %in% 2)) .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA - } else{.vms$count.fping.trip <- NA} - - - # => COUNT nb of gears per FT_REF because each ping will be repeated by gear after merging - count.gr.trip <- tapply(.logbk$LE_GEAR, .logbk$FT_REF, function(x) length(unique(x))) - .logbk$count.gr.trip <- count.gr.trip[.logbk$FT_REF] # mapping - - } - - - # find total nb of FISHING ping per trip-icessquare from vms # used for method 2 'FT_REF_SQ' - if(method=="FT_REF_SQ"){ - # .vms$count.fping.trip.sq <- factor(.vms$FT_REF_SQ) # init - # count.fping.trip.sq <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ) # COUNT nb of FISHING pings per FT_REF_SQ - # levels(.vms$count.fping.trip.sq) <- count.fping.trip.sq[levels(.vms$count.fping.trip.sq)] # mapping - # if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA - - if(any(.vms$SI_STATE==1)){ - .vms$count.fping.trip.sq <- factor(.vms$FT_REF_SQ) # init - countp <- countPings(~VE_REF+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",]) - rownames(countp) <- interaction(countp$FT_REF,countp$SI_RECT) - levels(.vms$count.fping.trip.sq ) <- countp[levels(.vms$count.fping.trip.sq),"pings"] # mapping - if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA - } else{.vms$count.fping.trip.sq <- NA} - - - # => COUNT nb of gears per FT_REF_SQ because each ping will be repeated by gear after merging - count.gr.trip.sq <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ, function(x) length(unique(x))) - .logbk$count.gr.trip.sq <- count.gr.trip.sq[.logbk$FT_REF_SQ] # mapping - } - - - # find total nb of FISHING ping per trip-icessquare-day from vms # used for method 3 'FT_REF_SQ_DAY' - if(method=="FT_REF_SQ_DAY"){ -# .vms$count.fping.trip.sq.day <- factor(.vms$FT_REF_SQ_DAY) # init -# count.fping.trip.sq.day <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ_DAY) # COUNT nb of FISHING pings per FT_REF_SQ_DAY -# levels(.vms$count.fping.trip.sq.day) <- count.fping.trip.sq.day[levels(.vms$count.fping.trip.sq.day)] # mapping -# if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA - - - if(any(.vms$SI_STATE==1)){ - .vms$count.fping.trip.sq.day <- factor(.vms$FT_REF_SQ_DAY) # init - countp <- countPings(~VE_REF+day+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",]) - rownames(countp) <- interaction(countp$FT_REF, countp$SI_RECT, countp$SI_DAY) - levels(.vms$count.fping.trip.sq.day) <- countp[levels(.vms$count.fping.trip.sq.day),"pings"] # mapping - if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA - } else{.vms$count.fping.trip.sq.day <- NA} - - - # => COUNT nb of gears per FT_REF_SQ_DAY because each ping will be repeated by gear after merging - count.gr.trip.sq.day <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ_DAY, function(x) length(unique(x))) - .logbk$count.gr.trip.sq.day <- count.gr.trip.sq.day[.logbk$FT_REF_SQ_DAY] # mapping} - } - - - - # do the merging between .logbk and .vms according to - # meth1: 'FT_REF' OR meth2: 'FT_REF_SQ' OR meth3: 'FT_REF_SQ_DAY' - # need to use a trick to avoid "out of memory" doing the merge() - coln.idx1 <- which(!colnames(.logbk)%in%c("VE_REF", method)) - coln1 <- colnames(.logbk)[coln.idx1] - tmp1 <- data.frame(coll= collapse.all.columns (.logbk, columns= coln.idx1 ), - VE_REF=.logbk$VE_REF, a.method= .logbk[,method] ) #.logbk - coln.idx2 <- which(!colnames(.vms)%in%c("VE_REF", method)) - coln2 <- colnames(.vms)[coln.idx2] - tmp2 <- data.frame(coll2= collapse.all.columns (.vms, columns= coln.idx2 ), - VE_REF=.vms$VE_REF, a.method= .vms[,method] ) #.vms - tmp1[,"a.method"] <- factor(tmp1[,"a.method"] ) - tmp2[,"a.method"] <- factor(tmp2[,"a.method"] ) - - merged.this.vessel <- merge(tmp1, tmp2, all.x=TRUE, all.y=TRUE, suffixes = c(".bk",".vms")) - #=> so, with all.y = TRUE, the vms records without corresponding logbk records are kept and NA are produced on the logbook part - #=> so, with all.x = TRUE, the logbk records without corresponding vms records are kept and NA are produced on the vms part - merged.this.vessel$coll <- replace(as.character(merged.this.vessel$coll),is.na(merged.this.vessel$coll), paste(rep("NA",length(coln1)),collapse="#")) - merged.this.vessel$coll <- factor(merged.this.vessel$coll) - #=> adapt 'coll' to get a vector of NA (NA in case of 'in vms but not in logbook') - merged.this.vessel$coll2 <- replace(as.character(merged.this.vessel$coll2),is.na(merged.this.vessel$coll2), paste(rep("NA",length(coln2)),collapse="#")) - # adapt 'coll2' to get a vector of NA (NA in case of 'in logbook but not in vms') - merged.this.vessel$coll2 <- factor(merged.this.vessel$coll2) - colnames(merged.this.vessel)[colnames(merged.this.vessel)%in%"a.method"] <- method - - tmp3 <- uncollapse.column(merged.this.vessel, column="coll") # logbk - tmp4 <- uncollapse.column(merged.this.vessel, column="coll2") # vms - tmp5 <- cbind.data.frame(merged.this.vessel[,c("VE_REF", method)], tmp3, tmp4) - colnames(tmp5) <- c("VE_REF", method, coln1, coln2) - merged.this.vessel <- tmp5 - - # we can choose to correct to keep the land. weight: - # the loss in weight will come from the matching records having catches but - # without fishing pings (i.e. only steaming pings)! - if(is.null(general$conserve.all)) general$conserve.all <- FALSE - if(general$conserve.all){ - # do the conservation of landings anyway? - # detect possible weight landed while no feffort detected from vms - # find FT_REF with some NA - vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip=="NA","FT_REF"])) - # then, find FT_REF with at least one no NA - no.vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip!="NA","FT_REF"])) - tripnum.all.na.inside <- vv[!vv%in%no.vv] # trip num without at least one count.fping! - # so, deduce loss in weight - zz<- merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,] - - if(method=="FT_REF"){ - # in this case, reallocate evenly between all pings (caution: including steaming pings) - merged.this.vessel[,"count.fping.trip"] <- anf(merged.this.vessel[,"count.fping.trip"]) - merged.this.vessel$FT_REF <- factor( merged.this.vessel$FT_REF) - nbpings.per.trip <- unlist(lapply(split(merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,], - merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,]$FT_REF),nrow)) - merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "count.fping.trip"] <- rep(nbpings.per.trip,nbpings.per.trip ) - merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "flag"] <- 5 - } - } # end conserve.all - - - - - # apply the catches re-distribution - # method 1, 2 and 3: per ping - # PER PING: - # ASSUMING EQUAL ALLOCATION BETWEEN FISHING PINGS AND GEARS USE INSIDE A SAME TRIP - nm <- names(merged.this.vessel) - idx.col.w <- grep('KG', nm) # index columns with species weight - idx.col.v <- grep('EURO', nm) # index columns with species value - idx.col <- c(idx.col.w, idx.col.v) - if(method=="FT_REF_SQ_DAY"){ - merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) / - anf(merged.this.vessel$count.fping.trip.sq.day)) / - anf(merged.this.vessel$count.gr.trip.sq.day) - } - if(method=="FT_REF_SQ"){ - merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) / - anf(merged.this.vessel$count.fping.trip.sq)) / - anf(merged.this.vessel$count.gr.trip.sq) - } - if(method=="FT_REF"){ - merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) / - anf(merged.this.vessel$count.fping.trip) ) / - anf(merged.this.vessel$count.gr.trip) - } - - - - - return(merged.this.vessel) - } - - - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # SET UP PRIMARY KEYS FOR MERGING!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - .logbk$FT_REF_SQ_DAY <- factor(paste(.logbk$FT_REF, ".", .logbk$LE_RECT,".", an(format(.logbk$LE_CTIME, '%j')), sep='')) - .vms$FT_REF <- factor(.vms$FT_REF) - .vms$FT_REF_SQ <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT, sep='')) - .vms$FT_REF_SQ_DAY <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT,".", an(format(.vms$SI_DATIM, '%j')), sep='')) - - - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # AGGREGATE WEIGHT PER SPECIES !#!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - nm <- names(.logbk) - idx.col.w <- grep('KG', nm) # index columns with species weight - idx.col.v <- grep('EURO', nm) # index columns with species value - idx.col <- c(idx.col.w, idx.col.v) - - DT <- data.table(.logbk) # library data.table for fast grouping replacing aggregate() - # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ_DAY (NOTE: SO, 'LE_SEQNUM' IS AGGREGATED HERE) - eq1 <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep="")) - .logbk <- DT[,eval(eq1),by=list(FT_REF_SQ_DAY,VE_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)] - .logbk <- data.frame(.logbk) - colnames(.logbk) <- c("FT_REF_SQ_DAY","VE_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col]) - - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - # MERGING PROCEDURE CHOICE !#!#!#!#!#!#!#!#!#!#!# - #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# - - my.split <- function(obj,a.sep="\\.",idx=1) unlist(lapply(strsplit(obj, a.sep),function(x)x[idx])) - # reduce the level - .logbk$FT_REF_SQ <- factor(paste(my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=1), - my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=2),sep='.')) - # reduce the level - .logbk$FT_REF <- factor(my.split(as.character(.logbk$FT_REF_SQ),a.sep="\\.",idx=1)) - - # find common keys - tripnum.sq.day.in.vms.and.in.bk <- .vms$FT_REF_SQ_DAY [.vms$FT_REF_SQ_DAY %in% .logbk$FT_REF_SQ_DAY] - tripnum.sq.in.vms.and.in.bk <- .vms$FT_REF_SQ [.vms$FT_REF_SQ %in% .logbk$FT_REF_SQ] - .vms.in.bk <- .vms[ .vms$FT_REF_SQ_DAY %in% tripnum.sq.day.in.vms.and.in.bk,] - .vms.in.bk2 <- .vms[ !(.vms$FT_REF_SQ_DAY %in% tripnum.sq.day.in.vms.and.in.bk) & - .vms$FT_REF_SQ %in% tripnum.sq.in.vms.and.in.bk,] - in.bk.and.feffort.not.at.0 <- unique(.vms.in.bk[.vms.in.bk$SI_STATE==1,]$FT_REF_SQ_DAY) - in.bk2.and.feffort.not.at.0 <- unique(.vms.in.bk2[.vms.in.bk2$SI_STATE==1,]$FT_REF_SQ) - - # split .vms and .logbk in three blocks - # vms with good match => go to meth3 - .vms.for.meth3 <- .vms [.vms$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0, ] - # vms with intermediate match => go to meth2 - .vms.for.meth2 <- .vms [!(.vms$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & - (.vms$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] - # vms with bad match => go to meth1 - .vms.for.meth1 <- .vms [!(.vms$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & - !(.vms$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] - # logbk with good match => go to meth3 - .logbk.for.meth3 <- .logbk [.logbk$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0, ] - # logbk with intermediate match => go to meth2 - .logbk.for.meth2 <- .logbk [!(.logbk$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & - (.logbk$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] - # logbk with bad match => go to meth1 - .logbk.for.meth1 <- .logbk [!(.logbk$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & - !(.logbk$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] - - suppressWarnings(rm(merged1, merged2, merged3)) # clear - #!! METH1 !!# - if(nrow(.logbk.for.meth1)!=0 && nrow(.vms.for.meth1)!=0 ) { - # remove useless cols and aggregate according to the key 'FT_REF' - .logbk.for.meth1 <- .logbk.for.meth1[, !colnames(.logbk.for.meth1)%in% c("FT_REF_SQ_DAY","FT_REF_SQ")] - nm <- names(.logbk.for.meth1) - idx.col.w <- grep('KG', nm) # index columns with species weight - idx.col.v <- grep('EURO', nm) # index columns with species value - idx.col <- c(idx.col.w, idx.col.v) - # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF - DT <- data.table(.logbk.for.meth1) # library data.table for fast grouping replacing aggregate() - eq1 <- c.listquote(paste("sum(", nm[idx.col],",na.rm=TRUE)",sep="")) - .logbk.for.meth1 <- DT[,eval(eq1),by=list(VE_REF,FT_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)] - .logbk.for.meth1 <- data.frame(.logbk.for.meth1) - colnames(.logbk.for.meth1) <- c("VE_REF","FT_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col]) - # do.merging - merged1 <- do.merging(method="FT_REF", .logbk.for.meth1, .vms.for.meth1, general) - # add meth flag - if("flag" %in% names(merged1) && nrow(merged1[is.na(merged1[,"flag"]),])!=0){ - merged1[is.na(merged1[,"flag"]),"flag"] <- 1 # meth 1 - } else merged1$flag <- 1 - } - #!! METH2 !!# - if(nrow(.logbk.for.meth2)!=0 && nrow(.vms.for.meth2)!=0 ) { - # remove useless cols and aggregate according to the key 'FT_REF_SQ' - .logbk.for.meth2 <- .logbk.for.meth2[, !colnames(.logbk.for.meth2)%in% c("FT_REF_SQ_DAY","FT_REF")] - nm <- names(.logbk.for.meth2) - idx.col.w <- grep('KG', nm) # index columns with species weight - idx.col.v <- grep('EURO', nm) # index columns with species value - idx.col <- c(idx.col.w, idx.col.v) - # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ - DT <- data.table( .logbk.for.meth2) # library data.table for fast grouping replacing aggregate() - eq2 <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep="")) - .logbk.for.meth2 <- DT[,eval(eq2),by=list(VE_REF,FT_REF_SQ,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)] - .logbk.for.meth2 <- data.frame(.logbk.for.meth2) - colnames(.logbk.for.meth2) <- c("VE_REF","FT_REF_SQ","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col]) - # do.merging - merged2 <- do.merging(method="FT_REF_SQ", .logbk.for.meth2, .vms.for.meth2, general) - # add meth flag - merged2$flag <- 2 # meth 2 - } - #!! METH3 !!# - if(nrow(.logbk.for.meth3)!=0 && nrow(.vms.for.meth3)!=0 ) { - # do.merging - merged3 <- do.merging(method="FT_REF_SQ_DAY", .logbk.for.meth3, .vms.for.meth3, general) - # add meth flag - merged3$flag <- 3 # meth 3 - } - - # bind the three blocks - merged <- NULL ; colnm <- NULL - for(i in 1: 3){ - a.table <- try(get(paste('merged',i,sep='')), silent=TRUE) - if(class(a.table)!="try-error"){ - a.table <- a.table[, !colnames(a.table) %in% - c("count.fping.trip.sq.day","count.fping.trip.sq","count.fping.trip", - "tot.fish.effort.trip","tot.fish.effort.trip.sq", - "count.gr.trip", "count.gr.trip.sq", "count.gr.trip.sq.day", - "FT_REF_SQ", "FT_REF_SQ_DAY")] # remove tool columns - if(i==1) colnm <- colnames(a.table) ; if(is.null(colnm)) colnm <- colnames(a.table) - merged <- rbind.data.frame (merged, a.table[, colnm]) - } - } - - # if still 'not merging' part, retrieve on NA side i.e. occurs when pings in vms but not in bk - merged <- retrieveOnBkSide(merged, type.data=c( "VE_FLT","VE_KW","LE_GEAR", "LE_MET_level6")) # i.e. when metier=='NA' - - - # clean up - rm(a.table, merged1, merged2, merged3, merged.this.vessel,.vms, .logbk, logbk.this.vessel, vms.this.vessel) - gc(reset=TRUE) - - # restore tacsat names "%e/%m/%Y %H:%M" - idx <- merged$SI_DATIM!='NA' # NA is possible when bk not in vms because bk.tripnum vms may belong to another block than block1 - merged$SI_DATIM <- as.character(merged$SI_DATIM) - merged$SI_DATE <- NA - merged[idx,"SI_DATE"] <- paste(substr(merged[idx,]$SI_DATIM ,9,10),"/", - substr(merged[idx,]$SI_DATIM , 6,7), "/", substr(merged[idx,]$SI_DATIM ,1,4), sep='') - merged$SI_TIME <- NA - merged[idx,"SI_TIME"] <- paste(substr(merged[idx,]$SI_DATIM , 12,13),":", - substr(merged[idx,]$SI_DATIM , 15,16), sep='') - - # last calculation - merged$KW_HOURS <- anf(merged$VE_KW) * anf(merged$LE_EFF_VMS) /60 - - # order chronologically - merged <- orderBy(~SI_DATIM, merged) - - # last clean up - merged <- merged[, !colnames(merged) %in% c('idx', 'icessquare', "SI_DATIM", "SI_MIDTIME")] - - # save------------ - save("merged", file=file.path(general$output.path, - paste("merged_", a.vesselid,"_",general$a.year,".RData", sep=''))) - cat(paste("save 'merged'...OK\n\n",sep="")) - - - }else{ # end 'a.flag' - cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n")) - # because no vms for this vessel... - # TO DO: the logbk way - #... - } - }else{ # end try-error - cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n")) - # because no vms for this vessel... - # TO DO: the logbk way - #... - } - - - - - } # end a.vesselid - - - - - - -return() -} - - - - - - - ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - ##!!!!!MAIN!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - if(FALSE) { - - - #\dontrun{ - data(eflalo) - data(tacsat) - data(euharbours); euharbours <- harbours - - # format - eflalo <- formatEflalo(eflalo) - tacsat <- formatTacsat(tacsat) - - # order tacsat chronologically with library(doBy) - tacsat <- sortTacsat(tacsat) - - # test each ping if in harbour or not - tacsat$SI_HARB <- NA - euharbours$Description <- euharbours$harbour - tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG), - lat=anf(tacsat$SI_LATI), - harbours=euharbours, - rowSize=30, returnNames=TRUE) - inHarb <- tacsat$SI_HARB - inHarb <- replace(inHarb, !is.na(inHarb), 1) - inHarb <- replace(inHarb, is.na(inHarb), 0) - inHarb <- as.numeric(inHarb) - - # assign a trip identifier - tacsat$SI_FT <- 1 # init - idx <- which(inHarb==0) - tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index - - # keep 'out of harbour' points only - # (but keep the departure point and the arrival point lying in the harbour) - startTrip <- c(diff(tacsat[,"SI_FT"]), 0) - endTrip <- c(0, diff(tacsat[,"SI_FT"])) - tacsat[which(startTrip>0),"SI_FT"] <- tacsat[which(startTrip>0)+1,"SI_FT"] - tacsat[which(endTrip<0),"SI_FT"] <- tacsat[which(endTrip<0)-1,"SI_FT"] - tacsat <- tacsat[which(inHarb==0 | startTrip>0 | endTrip<0),] - - - # assign a state to each ping (here, useless if detectFishing at TRUE) - tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming) - # fake speed rule for fishing state - tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1 - - - # reduce the size of the eflalo data by merging species - # (assuming that the other species is coded MZZ), threshold in euros. - eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ") - - # debug if eflalo has not been cleaned earlier - eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),] - - # an informed VE_FLT is also required - if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1" - - # possible mis-naming mistakes - if(!match('LE_MET_level6',colnames(eflalo))>0){ - eflalo$LE_MET_level6 <- eflalo$LE_MET - } - - # debug - eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",] - - - # TEST FOR A GIVEN SET OF VESSELS - # (if detect.fishing is true then do also detection of fishing activity - # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states - # that will overwrite the existing SI_STATE) - mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"), - general=list(output.path=file.path("C:","output"), - visual.check=TRUE, detectFishing=TRUE, speed="segment", - what.speed="calculated")) - # ...OR APPLY FOR ALL VESSELS IN eflalo - mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, - general=list(output.path=file.path("C:","output"), - visual.check=TRUE, detectFishing=TRUE, speed="segment", - what.speed="calculated")) - gc(reset=TRUE) - - # load the merged output table for one vessel - load(file.path("C:","output","merged_804_1800.RData")) - - # check the conservation of landings - sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE)) - sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE) - - - # ...or bind all vessels (keeping only some given species here) - bindAllMergedTables (vessels=c("738", "804"), a.year = "1800", - species.to.keep=c("PLE","COD"), - folder = file.path("C:","output"), - all.in.one.table=TRUE) - - # ...and load the merged output table for all vessels - load(file.path("C:","output","all_merged__1800.RData")) - - # map landing of cod from all studied vessels - # ( with debugging if tacsat has not been cleaned earlier) - graphics.off() - df1<- all.merged[, c("SI_LATI","SI_LONG","LE_KG_COD")] - df1$SI_LONG <- anf(df1$SI_LONG) - df1$SI_LATI <- anf(df1$SI_LATI) - df1 <- df1[ !is.na(df1$SI_LATI),] - df1 <- df1[ !is.na(df1$SI_LONG),] - vmsGridCreate(df1,nameLon="SI_LONG", nameLat="SI_LATI", - nameVarToSum = "LE_KG_COD", cellsizeX =0.1, - cellsizeY =0.05, legendtitle = "COD landings (kg)") - - # but you need to remove steaming points before gridding! - df2<-df1[-which(is.na(df1$LE_KG_COD)),] - vmsGridCreate(df2,nameLon="SI_LONG",nameLat="SI_LATI", we = 3, ea = 6, so = 50, no = 54, - nameVarToSum = "LE_KG_COD",cellsizeX =0.1, - cellsizeY =0.05, legendtitle = "COD landings (kg)", plotPoints =TRUE, - breaks0=c(1,2,4,8,16,32,64,100000)) - - - - # CONVERT TO FISHFRAME FORMAT (might take some time running) - # (by default, this will keep all the species in the output table) - tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF), - species.to.keep=character(), - folder = file.path("C:","output"), - all.in.one.table=FALSE) - - ff <- pings2Fishframe (general=list(output.path=file.path("C:","output"), - a.year=1800, a.country="NLD", degree=0.05 ) ) - - - # TO DO.... - # Use the interpolation routine to improve the location of the effort - #all.merged$SI_SP <- as.numeric(as.character( all.merged$SI_SP)) - #all.merged$SI_HE <- as.numeric(as.character( all.merged$SI_HE)) - #all.merged$SI_LONG <-as.numeric(as.character(all.merged$SI_LONG)) - #all.merged$SI_LATI <-as.numeric(as.character(all.merged$SI_LATI)) - #interpolations <- interpolateTacsat( all.merged [,c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE")] - # ,interval=120 - # ,margin=12 - # ,res=100 - # ,method="cHs" - # ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6)) - # ,headingAdjustment=0 - # ) - #interpolationsED <- equalDistance(interpolations,res=10) - # make sure that the 'res' statement in the interpolateTacsat is significantly bigger - # than the 'res' statement in the equalDistance function. - - # then map again... - #vmsGridCreate(interpolationsED,nameLon="SI_LONG",nameLat="SI_LATI", - # cellsizeX =0.05, cellsizeY =0.05, legendtitle = "landings (kg)") - - - - #} - -} # end main \ No newline at end of file +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +# A 'R' ROUTINE FOR THE COUPLING OF VMS AND LOGBOOKS +# WP4 - Lot2 EU tender VMS/LOGBOOKS COUPLING +# author: Francois Bastardie (DTU- Aqua; fba@aqua.dtu.dk) +# January 2010 +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!# + + + + + + +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!MERGE LOGBOOKS WITH VMS PER VESSEL!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + + +#!!!!!!!!!!!!!!!!!!!!!# +#!!!!!!!!!!!!!!!!!!!!!# + + +#' Coupling of VMS and logbooks data including landings dispatching between +#' fishing pings. +#' +#' The first step of the merging routine consisted of assigning a common +#' identifier joining each VMS trip to a logbook trip by searching for the +#' nearest trip among all logbook trips based on the temporal mid-point of +#' trips. This circumvents problems where the start and end time of VMS and +#' logbook trips do not match exactly and ensure that each logbook trip will be +#' merged with a given VMS trip even if the reporting of the start and end date +#' by fishermen is uncertain. We observed in the data that this reporting date +#' may fluctuate with +/- 1 day around the VMS trip dates possibly because of +#' error in reporting. Additionally, the possible few remaining logbook trips +#' were also linked with the nearest VMS trips. This latter event may occur +#' from unknown failure in the VMS device while fishermen were still fishing. +#' In the processing, links were then forced to nearest VMS trips to prevent +#' loss of any reported landings. +#' +#' After this fist merging, the distinction of fishnig and not-fishing states +#' can be performed if the 'detectFishing' argument is at TRUE. The current +#' default method ('segment') will run an automatic detection of the fishing +#' peak given the gear type in use. After having detected the vessel-specific +#' speed lower and upper boundaries, one state (i.e. in SI_STATE) is assigned +#' to each ping. +#' +#' The second step of the process evaluated the consistency between both types +#' of data sources. Sequentially, a quality flag (i to iv) was issued for each +#' recorded position depending on the various degrees of matching, from finer +#' to coarser, if both data have in common: (i) the trip identifier, the same +#' area (ICES rectangle), and the catch date (ii) only the trip identifier and +#' the area, or (iii) only the trip identifier. (the flag iv occur when not +#' fishing points is remaining while some landings are still to be allocated in +#' these very few cases, all steaming points are actually converted to fishing +#' points). The initial datasets (both VMS and logbooks) were split into +#' sub-blocks according to this flag. Each pair of sub-blocks was then joined +#' separately and all merged sub-blocks were bound in one dataset afterwards. +#' +#' In a final step, declared landings (in weight and value) at the ICES +#' rectangle scale from logbooks were further allocated at a lower spatial and +#' time scale level with regard to the VMS information. This was done by +#' allocating a proportion of the landings to each detected fishing position +#' depending on the level of matching between VMS and logbook data as described +#' in step 2. In the case of full matching, landings allocated to a given +#' position were proportional to the number of detected fishing positions lying +#' inside each declared logbook area for a given date, assuming that the total +#' landings declared in this area at this date are divided equally among all of +#' these positions. In the particular case of the use of several gear types +#' within the same rectangle and also for the same catch date, an equal share +#' of landings between gear types was also assumed for this day. In case of +#' partial matching, the following procedure was used. First, when the match +#' was correct by area but not by catch date, the landings were equally +#' dispatched between all the fishing positions of this trip detected in this +#' particular ICES rectangle. Second, when the match failed on both catch date +#' and area, the landings concerned were equally allocated to all the detected +#' fishing positions of the trip for which no previous match has been detected. +#' Finally, residual landings (kept in the final output) occurred when the +#' match failed both for the catch date and area, and no fishing position +#' remained. +#' +#' Warning: However, one should take care that the generated maps related to +#' the origin of landings do not entirely reflect the spatial distributions of +#' the harvested populations but more accurately should be interpreted as the +#' conjunction between the stocks and the fishermen effort distribution. The +#' distribution of the landings on discrete positions should also be understood +#' with some cautions ( albeit the number of pings may be high enough to be +#' considered representative on a large part of the total exploited areas) and +#' possible allocation of landings on locations we know that a stock is not +#' present might potentially occur. +#' +#' in tacsat SI_STAT, fishing should be coded 1 and non-fishing should be coded +#' 2. The method is very sensitive to missing harbours. Some warning messages +#' are then provided to help the user to detect some repeated positions that +#' are likely to be missing harbours. In this last case you should revise the +#' harbour list before processing again. +#' +#' @param eflalo data.frame, eflalo format +#' @param tacsat data.frame, tacsat format +#' @param general list, general settings to go through all sub-functions e.g. +#' detectFishing +#' @param vessels (optional) name of the vessel(s) to process e.g. those in +#' tacsat +#' @return Nothing is returned but a merged data.frame per vessel in the output +#' folder. These data.frame could be later bound into a big one using +#' bindAllMergedTable() +#' @author Francois Bastardie +#' @seealso \code{\link{vmsGridCreate}} +#' @references Bastardie et al. 2010. Fisheries Research +#' @examples +#' +#' +#' \dontrun{ +#' data(eflalo) +#' data(tacsat) +#' data(euharbours) +#' +#' # format +#' eflalo <- formatEflalo(eflalo) +#' tacsat <- formatTacsat(tacsat) +#' +#' # order tacsat chronologically with library(doBy) +#' tacsat <- sortTacsat(tacsat) +#' +#' # test each ping if in harbour or not +#' tacsat$SI_HARB <- NA +#' euharbours$Description <- euharbours$harbour +#' tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG), +#' lat=anf(tacsat$SI_LATI), +#' harbours=euharbours, +#' rowSize=30, returnNames=TRUE) +#' inHarb <- tacsat$SI_HARB +#' inHarb <- replace(inHarb, !is.na(inHarb), 1) +#' inHarb <- replace(inHarb, is.na(inHarb), 0) +#' inHarb <- as.numeric(inHarb) +#' +#' # assign a trip identifier +#' tacsat$SI_FT <- 1 # init +#' idx <- which(inHarb==0) +#' tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index +#' +#' # keep 'out of harbour' points only +#' # (but keep the departure point and the arrival point lying in the harbour) +#' startTrip <- c(diff(tacsat[,"SI_FT"]), 0) +#' endTrip <- c(0, diff(tacsat[,"SI_FT"])) +#' tacsat[which(startTrip>0),"SI_FT"] <- tacsat[which(startTrip>0)+1,"SI_FT"] +#' tacsat[which(endTrip<0),"SI_FT"] <- tacsat[which(endTrip<0)-1,"SI_FT"] +#' tacsat <- tacsat[which(inHarb==0 | startTrip>0 | endTrip<0),] +#' +#' +#' # assign a state to each ping (here, useless if detectFishing at TRUE) +#' tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming) +#' # fake speed rule for fishing state +#' tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1 +#' +#' +#' # reduce the size of the eflalo data by merging species +#' # (assuming that the other species is coded MZZ), threshold in euros. +#' eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ") +#' +#' # debug if eflalo has not been cleaned earlier +#' eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),] +#' +#' # an informed VE_FLT is also required +#' if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1" +#' +#' # possible mis-naming mistakes +#' if(!match('LE_MET_level6',colnames(eflalo))>0){ +#' eflalo$LE_MET_level6 <- eflalo$LE_MET +#' } +#' +#' # debug +#' eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",] +#' +#' +#' # TEST FOR A GIVEN SET OF VESSELS +#' # (if detect.fishing is true then do also detection of fishing activity +#' # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states +#' # that will overwrite the existing SI_STATE) +#' mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"), +#' general=list(output.path=file.path("C:","output"), +#' visual.check=TRUE, detectFishing=TRUE, speed="segment", +#' what.speed="calculated")) +#' # ...OR APPLY FOR ALL VESSELS IN eflalo +#' mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, +#' general=list(output.path=file.path("C:","output"), +#' visual.check=TRUE, detectFishing=TRUE, speed="segment", +#' what.speed="calculated")) +#' gc(reset=TRUE) +#' +#' # load the merged output table for one vessel +#' load(file.path("C:","output","merged_804_1800.RData")) +#' +#' # check the conservation of landings +#' sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE)) +#' sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE) +#' +#' +#' # ...or bind all vessels (keeping only some given species here) +#' bindAllMergedTables (vessels=c("738", "804"), a.year = "1800", +#' species.to.keep=c("PLE","COD"), +#' folder = file.path("C:","output"), +#' all.in.one.table=TRUE) +#' +#' # ...and load the merged output table for all vessels +#' load(file.path("C:","output","all_merged__1800.RData")) +#' +#' # map landing of cod from all studied vessels +#' # ( with debugging if tacsat has not been cleaned earlier) +#' graphics.off() +#' df1<- all.merged[, c("SI_LATI","SI_LONG","LE_KG_COD")] +#' df1$SI_LONG <- anf(df1$SI_LONG) +#' df1$SI_LATI <- anf(df1$SI_LATI) +#' df1 <- df1[ !is.na(df1$SI_LATI),] +#' df1 <- df1[ !is.na(df1$SI_LONG),] +#' vmsGridCreate(df1,nameLon="SI_LONG", nameLat="SI_LATI", +#' nameVarToSum = "LE_KG_COD", cellsizeX =0.1, +#' cellsizeY =0.05, legendtitle = "COD landings (kg)") +#' +#' # but you need to remove steaming points before gridding! +#' df2<-df1[-which(is.na(df1$LE_KG_COD)),] +#' vmsGridCreate(df2,nameLon="SI_LONG",nameLat="SI_LATI", we = 3, ea = 6, so = 50, no = 54, +#' nameVarToSum = "LE_KG_COD",cellsizeX =0.1, +#' cellsizeY =0.05, legendtitle = "COD landings (kg)", plotPoints =TRUE, +#' breaks0=c(1,2,4,8,16,32,64,100000)) +#' +#' +#' +#' # CONVERT TO FISHFRAME FORMAT (might take some time running) +#' # (by default, this will keep all the species in the output table) +#' tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF), +#' species.to.keep=character(), +#' folder = file.path("C:","output"), +#' all.in.one.table=FALSE) +#' +#' ff <- pings2Fishframe (general=list(output.path=file.path("C:","output"), +#' a.year=1800, a.country="NLD", degree=0.05 ) ) +#' +#' } +#' +#' +#' @export mergeEflalo2Pings +mergeEflalo2Pings <- + function(eflalo, tacsat, vessels=unique(eflalo$VE_REF), general=list(output.path=file.path("C:"), + visual.check=TRUE, detectFishing=FALSE, speed="segment", what.speed="calculated", conserve.all=TRUE, + ), ...){ + + lstargs <- as.list( sys.call() ) # equivalent to lstargs <- list(...) but suppress the r cmd build warning? + + # create required folders for outputs + cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n") + dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777") + + + #!!!!!!!!!!!!!!!!!!!!!# + #!!!!!!!!!!!!!!!!!!!!!# + # utils-- + collapse.all.columns <- function (obj, columns= seq(ncol(obj)) ){ + eval(parse(text=paste('paste(obj[,', paste(columns,collapse='] ,"#", obj[,'), '],sep=\'\')', sep=''))) } + uncollapse.column <- function(obj, column="coll"){ + dd<- strsplit(as.character(obj[,column]),"#") ; nco <- length(dd[[1]]) ; dd<- unlist(dd) + res <- eval(parse(text=paste('data.frame(',paste('dd[seq(',1:nco,',nrow(obj)*nco,by=nco)]', collapse=','),')'))) + colnames(res) <- paste("col",1:nco,sep='') + return(res) + } + + #utils-- + # FUNCTION TO CREATE A SPATIAL GRID + # 'xx' have a 'SI_LATI' and a 'SI_LONG' columns + assignPointsToSpatialGrid <- function(xx){ + + xx <- xx[,!colnames(xx) %in% c("icessquare","icessquare.vms") ] # remove + xx <- cbind.data.frame(xx, icessquare= rep(0,nrow(xx))) + + + rlong <- range(anf(xx$SI_LONG),na.rm=TRUE) + vect.long <- signif(seq(floor(rlong[1]), ceiling(rlong[2]), by=1),4) # long (x) + label.long <- rep(paste(rep(LETTERS,each=10),0:9,sep=""),each=1) + names(label.long) <- signif(seq(-50, 209, by=1),4) # long (x) + label.long <- label.long[!is.na(names(label.long))] # => correspondance long (-50 to 209) / sq letter (A0 to Z9) + label.long <- label.long[as.character(vect.long)] + rlat <- range(anf(xx$SI_LATI), na.rm=TRUE) + vect.lat <- signif(seq(floor(rlat[1]), ceiling(rlat[2]),by=0.5),4) # lat (y) + label.lat <- rep(paste(seq(1,75,1)),each=1) + names(label.lat) <- paste(signif(seq(36,73, by=0.5),4)) + label.lat <- label.lat[!is.na(names(label.lat))] # => correspondance lat (36 to 73) / sq number (1 to 75) + label.lat <- label.lat[as.character(vect.lat)] + vect.label <- paste(rep(label.lat,each=length(label.long)),"",label.long,sep="") + xx[,"SI_RECT"] <- paste(label.lat [findInterval(anf(xx[,"SI_LATI"]), vect.lat)] , label.long [findInterval(anf(xx[,"SI_LONG"]), vect.long)], sep="") + + return(xx) + } + + + #!!!!!!!!!!!!!!!!!!!!!# + #utils-- + # for managing NA on logbook side + # (from vms trip.sq without corresponding logbook trip.sq e.g. because no declaration in sq because only steaming time inside) + # we need to inform back the specificity of the vessel from logbook using info from the same trip i.e. vesselid+FT_REF + retrieveOnBkSide <- function(merged, type.data){ + idx <- which(merged$LE_MET_level6=="NA") + merged.NA <- merged[idx,] # input (only the trip.sq with NA for the logbook part) + + for (td in type.data){ + map <- tapply(merged[, td ], paste(merged$VE_REF, merged$FT_REF), + function(i) {ss<- unique(as.character(i)) ; ss[ss!="NA"][1]}) + merged.NA[, td ] <- factor(paste(merged.NA$VE_REF,merged.NA$FT_REF)) + levels(merged.NA[, td ]) <- map[levels(merged.NA[, td ])] + } + if(nrow(merged.NA)>0) merged.NA$flag <- 4 # flag on meth + merged[idx,] <- merged.NA # output + return(merged) + } + + #!#!##!#!##!#!##!#!##!#!##!#!# + #!#!##!#!##!#!##!#!##!#!##!#!# + #!#!##!#!##!#!##!#!##!#!##!#!# + #!#!##!#!##!#!##!#!##!#!##!#!# + #!#!##!#!##!#!##!#!##!#!##!#!# + all.vesselid <- as.character(unique(eflalo[anf(eflalo$VE_LEN)>=0,]$VE_REF)) + all.vesselid <- all.vesselid[!is.na(all.vesselid)] # e.g. when VE_LEN at NA exists + if(length(vessels)!=0) all.vesselid <- vessels + # => IF ARG INFORMED, THEN KEEP ONLY ONE OR SEVERAL VESSELS AS NEEDED.... + + for(a.vesselid in all.vesselid){ # PER VESSEL + cat(paste(a.vesselid,"\n", sep="" )) + + #---------- + #---------- + #---------- + #---------- + #---------- + #---------- + # LOGBOOK INPUT + logbk.this.vessel <- eflalo[eflalo$VE_REF %in% a.vesselid,] + logbk.this.vessel$LE_RECT <- factor(logbk.this.vessel$LE_RECT) + logbk.this.vessel$VE_REF <- factor(logbk.this.vessel$VE_REF) + logbk.this.vessel$VE_FLT <- factor(logbk.this.vessel$VE_FLT) + + + # automatic detection of a.year + general$a.year <- format(strptime( paste(logbk.this.vessel$FT_DDAT[1]) , tz='GMT', "%e/%m/%Y" ), "%Y") + + # departure time + logbk.this.vessel$LE_DTIME <- as.POSIXct( paste(logbk.this.vessel$FT_DDAT, logbk.this.vessel$FT_DTIME) , + tz='GMT', "%e/%m/%Y %H:%M" ) + # arrival time + logbk.this.vessel$LE_LTIME <- as.POSIXct( paste(logbk.this.vessel$FT_LDAT, logbk.this.vessel$FT_LTIME) , + tz='GMT', "%e/%m/%Y %H:%M" ) + # catch.date + logbk.this.vessel$LE_CTIME <- as.POSIXct( paste(logbk.this.vessel$LE_CDAT) , tz='GMT', "%e/%m/%Y" ) + + # mid time bk trips + LE_MIDTIME <- rep(NA, nrow(logbk.this.vessel)) + dep <- logbk.this.vessel$LE_DTIME +10 # we artificially add +10min because bug in R if mid-time is 00:00:00 + arr <- logbk.this.vessel$LE_LTIME +1 + for(r in 1:length(dep)){ + LE_MIDTIME[r] <- as.character(seq(from=dep[r], to=arr[r], length.out = 3)[2]) + } + logbk.this.vessel$LE_MIDTIME <- LE_MIDTIME + + if(!"FT_REF" %in% colnames(logbk.this.vessel) ) { + logbk.this.vessel$FT_REF <- factor(LE_MIDTIME) # init + levels(logbk.this.vessel$FT_REF) <- 1:length(logbk.this.vessel$FT_REF) # assign a FT_REF code + } # only if FT_REF is actually not already informed + + + + + #=> LOGBOOK (EFLALO) INPUT REQUIRES AT LEAST, + # 'VE_REF', FT_DDAT, FT_DTIME, FT_LDAT, FT_LTIME, FT_CDAT, + # 'LE_SP_KG' (etc.), 'LE_RECT', 'VE_FLT' AND 'LE_MET_level6', 'LE_GEAR' COLUMNS + # + + #---------- + #---------- + #---------- + #---------- + #---------- + #---------- + # VMS INPUT: load traj with 'at sea' pings SI_STATE informed + # ABSOLUTELY REQUIRED: c("VE_REF","SI_LATI","SI_LONG", "SI_DATE", "SI_TIME", "SI_FT", "SI_HARB", "SI_STATE") + + + if(a.vesselid %in% unique(tacsat$VE_REF)){ + + tacsat.this.vessel <- tacsat[tacsat$VE_REF == a.vesselid,] # subset for this vessel + tacsat.this.vessel$VE_REF <- factor(tacsat.this.vessel$VE_REF) + + + # if does not exist, add SI_DATIM for handling the time in R + tacsat.this.vessel$SI_TIME <- as.character(tacsat.this.vessel$SI_TIME) + tacsat.this.vessel[tacsat.this.vessel$SI_TIME=="24:00", "SI_TIME"] <- "00:00" # debug + tacsat.this.vessel[tacsat.this.vessel$SI_DATE=="29/02/1801", "SI_DATE"] <- "01/03/1801" # debug + tacsat.this.vessel$SI_DATIM <- as.POSIXct( paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME) , + tz='GMT', "%d/%m/%Y %H:%M" ) + tacsat.this.vessel <- tacsat.this.vessel[!is.na(tacsat.this.vessel$SI_DATIM),] # debug e.g. when 29/02/1801 + + + # keep only the essential + vms.this.vessel <- tacsat.this.vessel [, c("VE_REF","SI_LATI","SI_LONG", + "SI_DATIM","SI_FT", "SI_SP", "SI_HE", "SI_HARB", "SI_STATE")] + rm(tacsat.this.vessel); gc(reset=TRUE) + vms.this.vessel$VE_REF <- factor(vms.this.vessel$VE_REF) + + vms.this.vessel$idx <- 1:nrow(vms.this.vessel) # label for each ping + + + + + # filter if vessel with a bad vms + to.remove.because.deficient.vms <- any(is.na(vms.this.vessel$SI_FT)) + to.remove.because.not.enough.vms.trips <- length(unique(vms.this.vessel$SI_FT))< 2 # nb vms trips < 2 + if(length(unique(vms.this.vessel$SI_FT))<2) warning('need more than 1 trip in SI_FT') + + # filter if vessel with a bad logbook + to.remove.because.pble.lgbk <- length(unique(logbk.this.vessel$FT_REF))< 2 # nb logbk trips < 2 + + # then... + a.flag <- to.remove.because.deficient.vms || to.remove.because.not.enough.vms.trips || to.remove.because.pble.lgbk + + ## remove FT_REF and SI_MIDTIME if it exists + vms.this.vessel <- vms.this.vessel[, !colnames(vms.this.vessel) %in% c("FT_REF", "SI_MIDTIME")] + + + + if(a.flag==FALSE) { # i.e. vms-equipped + + if(all(is.na(vms.this.vessel$SI_STATE)) && general$detectFishing==FALSE) + stop('the SI_STATE column has to be informed before making the merging') + if(all(is.na(logbk.this.vessel$VE_FLT))) + stop('the VE_FLT column has to be informed before making the merging') + + # alias + .logbk <- logbk.this.vessel + .vms <- vms.this.vessel + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + #! DO THE LINK - APPROACH 1 #!#!!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + NIELS <- FALSE + if(NIELS){ + eftim <- .logbk[which(duplicated(.logbk$FT_REF)==FALSE),c("LE_DTIME","LE_LTIME","FT_REF")] + dtime <- eftim[,1] + ltime <- eftim[,2] + stime <- .vms$SI_DATIM + tripn <- eftim[,3] + + + + smdtime <- t(outer(stime,dtime,"-")) + gtltime <- outer(ltime,stime,"-") + + #-Find first point where tacsat time is greater or equal to departure time and smaller than arrival time + st <- apply(smdtime,1,function(x){which(x>=0)[1]}) + en <- apply(gtltime,1,function(x){rev(which(x>=0))[1]}) + + #-Make sure that values are within the interval of departure and arrival time + subse <- which(is.na(st <= en) == FALSE & (st <= en) == TRUE) + + st <- st[subse] + en <- en[subse] + + #-Assign Tacsat data with FT_REF from Eflalo2 dataset where they link + + if(length(st)!=1){ + + idx <- unlist(mapply(seq,st,en,SIMPLIFY=FALSE)) + reps <- unlist(lapply(mapply(seq,st,en,SIMPLIFY=FALSE),length)) + .vms$FT_REF <- 0 + .vms$FT_REF[idx] <- rep(tripn[subse],reps) + } + if(length(st)==1){ + .vms$FT_REF <- 0 + .vms$FT_REF[seq(st,en)] <- rep(tripn[subse],length(seq(st,en))) + } + if(length(st)==0){ + + .vms$FT_REF <- 0 + } + + + } # end NIELS + + FRANCOIS <- TRUE + if(FRANCOIS){ + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + #! DO THE LINK - APPROACH 2 #!#!!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!!#!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + + #!!!!!!!!!!!!!!!!!!# + #!!!!!!!!!!!!!!!!!!# + # -If IT DOES NOT EXIST YET-, + # FIND THE MID-TIME OF VMS TRIPS + if(any(colnames(.vms)%in%"SI_DATIM")){ + if(!any(colnames(.vms)%in%"SI_DTIME")){ + # find and add the first point of each trip + .vms$start.trip <- c(1,diff(.vms[,"SI_FT"])) + .vms$end.trip <- c(diff(.vms[,"SI_FT"]), .vms[nrow(.vms),"SI_FT"]) + .vms[.vms$start.trip>0, "start.trip"] <- .vms[.vms$start.trip>0, "SI_FT"] + .vms[.vms$end.trip>0, "end.trip"] <- .vms[.vms$end.trip>0, "SI_FT"] + + tmp <- .vms[.vms$start.trip>0,] + tmp <- tmp[,c("VE_REF","SI_DATIM","SI_FT")] + tmp2 <- .vms[.vms$end.trip>0,] + tmp2 <- tmp2[,c("VE_REF","SI_DATIM","SI_FT")] + .vms <- .vms[,!colnames(.vms) %in% c("start.trip", "end.trip")] # remove tool columns + table.midtime <- merge(tmp, tmp2, by.x="SI_FT", by.y="SI_FT") + table.midtime <- table.midtime[, c("SI_FT","VE_REF.x","SI_DATIM.x","SI_DATIM.y") ] + colnames(table.midtime) <- c("SI_FT","VE_REF","SI_DTIME","SI_ATIME") + } else{ + table.midtime <- .vms[, c("SI_FT","VE_REF","SI_DTIME","SI_ATIME") ] + table.midtime <- table.midtime[!duplicated(data.frame(table.midtime$SI_FT, table.midtime$VE_REF)),] + } + } else{stop("no 'SI_DATIM' found in vms")} + SI_MIDTIME <- rep(0, nrow(table.midtime)) + + for(r in 1: nrow(table.midtime)){ + SI_MIDTIME[r] <- as.character(seq(from=table.midtime$SI_DTIME[r], to=table.midtime$SI_ATIME[r], length.out = 3)[2]) + + } + table.midtime$SI_MIDTIME <- SI_MIDTIME + if(!any(colnames(.vms)%in%"SI_MIDTIME")){ # here we are... + .vms <- merge(.vms, table.midtime[,c("SI_FT","SI_MIDTIME")], by.x="SI_FT", by.y="SI_FT") + } + + + + #!!!!!!!!!!!!!!!!!!# + #!!!!!!!!!!!!!!!!!!# + # ASSIGN A 'BK.TRIPNUM' FROM LOGBOOK TO EACH VMS TRIP + trunk <-1 # trunk give the part of the year to be plotted (1 to 5) + # visual check + if(general$visual.check){ + windows(width=8, height=4) + ltrunk <- (nrow(table.midtime)/5) + idxtrunk <- (trunk+(trunk-1)*ltrunk):(trunk*ltrunk) + # plot(table.midtime$SI_DTIME[idxtrunk],rep(1,length(table.midtime$SI_DTIME[idxtrunk])), + plot(table.midtime$SI_DTIME, rep(1,length(table.midtime$SI_DTIME)), + ylim=c(0,0.52), type="n", ylab="", axes=FALSE) + r <- as.POSIXct(round(range(table.midtime$SI_DTIME), "days")) + axis.POSIXct(1, at=seq(r[1], r[2], by="month"), format="%e%b%y:%H:%M") + axis(2, at=c(0.5,0.1),labels=c("VMS","LOGBOOK")) + + for(i in 1:nrow(table.midtime)) { + segments(as.POSIXct(table.midtime$SI_DTIME[i], tz='GMT'), 0.5, as.POSIXct(table.midtime$SI_ATIME[i], tz='GMT'), 0.5, col=1) + points(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.5, col=1) + text(as.POSIXct(table.midtime$SI_MIDTIME[i], tz='GMT'), 0.52, table.midtime$SI_FT[i], cex=0.5, col=1) + + } + + tmp <- .logbk[, c("LE_DTIME","LE_LTIME", "LE_MIDTIME", "FT_REF")] + tmp <- tmp[!duplicated(tmp$LE_MIDTIME), ] + for(i in 1:nrow(tmp)){ + segments(as.POSIXct(tmp$LE_DTIME[i], tz='GMT'), 0.1, as.POSIXct(tmp$LE_LTIME[i], tz='GMT'), 0.1, col=1) + points(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.1, col=1) + text(as.POSIXct(tmp$LE_MIDTIME[i], tz='GMT'), 0.0785, tmp$FT_REF[i], cex=0.5, col=1) + } + } + + + # THE CORE CODE: compare bk$LE_MIDTIME and vms$SI_MIDTIME + # find the nearest bk$LE_MIDTIME for each vms$SI_MIDTIME + # and then change levels + # (so, for each mid.time in vms, a FT_REF will be find) + # (so, no lines in vms without a FT_REF from bk...) + fa1 <- levels(factor(.vms$SI_MIDTIME)) + new.levels <- fa1 + fa2 <- levels(factor(.logbk$LE_MIDTIME)) + for(i in 1:length(fa1)) { # for each level in vms + tmp <- abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] )) + if(all(is.na(tmp))) tmp <- abs(as.numeric( as.Date(fa2) - as.Date(fa1)[i] )) # debug the R bug in case of mid-time at 00:00 hour + new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )] # find the nearest level in logbook + } + .vms$SI_MIDTIME <- factor(as.character(.vms$SI_MIDTIME)) + sauv <- .vms$SI_MIDTIME + levels(.vms$SI_MIDTIME) <- new.levels # and change mid.time in vms to force the merging + + # finally, replace levels by the FT_REF + tmp <- .logbk[.logbk$LE_MIDTIME %in% .vms$SI_MIDTIME , c("FT_REF","LE_MIDTIME")] + tmp2 <- tmp[!duplicated(tmp$FT_REF),] + idx <- match( levels(.vms$SI_MIDTIME), tmp2$LE_MIDTIME ) + .vms$FT_REF <- .vms$SI_MIDTIME # init + levels(.vms$FT_REF) <- as.character(tmp2$FT_REF ) [idx] + + + if(general$visual.check){ + for(i in 1: nrow(.vms)) { + arrows(as.POSIXct( sauv[i]), 0.5 ,as.POSIXct( .vms$SI_MIDTIME[i], tz='GMT'),0.1, length=0.1) + } + } + + if(general$visual.check){ + ve <- as.character(.logbk$VE_REF[1]) + savePlot(filename = file.path(general$output.path, + paste("assign_eflalo_tripnum_to_vms_",ve,"_",general$a.year,".jpeg",sep="")),type ="jpeg") + dev.off() + } + + + ## ADD A WARNING IN CASE OF LONG (UNREALISTIC) TRIPS ## + diff.date <- table.midtime$SI_ATIME - table.midtime$SI_DTIME # if at least one trip >30 days + if(attributes(diff.date)$units=="secs") idx <- which((((diff.date)/3600)/24) >30) + if(attributes(diff.date)$units=="hours") idx <- which((((diff.date)/1)/24) >30) + attributes((table.midtime$SI_ATIME - table.midtime$SI_DTIME )) + if (length( idx) >0){ + cat(paste("at least one vms trip > 30 days detected! check harbours...", "\n", sep="")) + suspicious <- .vms[.vms$SI_FT %in% table.midtime$SI_FT[idx] ,] + tmp <- table(suspicious$SI_LATI) + lat.suspicious <- names(tmp[tmp>5]) + if(length(lat.suspicious)!=0) cat(paste("potential harbour likely near lat ",lat.suspicious,"\n",sep="")) + tmp <- table(suspicious$SI_LONG) + long.suspicious <- names(tmp[tmp>5]) + if(length(long.suspicious)!=0) cat(paste("potential harbour likely near long ",long.suspicious,"\n",sep="")) + } # if at least one trip >30 days + rm(table.midtime) ; gc(reset=TRUE) + + + + .logbk$LE_MIDTIME <- factor(.logbk$LE_MIDTIME) + .logbk$FT_REF <- factor(.logbk$FT_REF) + .vms$SI_MIDTIME <- factor(.vms$SI_MIDTIME) + .vms$FT_REF <- factor(.vms$FT_REF) + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + #! ASSIGN A 'SI_FT' FROM VMS TRIP NUM TO #!#!#!# + #! LOGBOOK TRIPS WITH NO VMS CORRESPONDANCE #!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + dep.bk.not.in.vms <- unique( .logbk$FT_REF [ !( .logbk$FT_REF %in% .vms$FT_REF ) ] ) + if(length(dep.bk.not.in.vms)!=0){ + # bk tripnum from dep not in vms + idx <- .logbk$FT_REF %in% dep.bk.not.in.vms + bk <- .logbk[idx,] [order(.logbk[idx,]$LE_DTIME),] + if(!"SI_MIDTIME" %in% colnames(.vms)){ + vms <- .vms [order(.vms$SI_DTIME),] + SI_MIDTIME <- rep(NA, nrow(vms)) + for(r in 1: nrow(vms)){ + SI_MIDTIME[r] <- as.character(seq(from=vms$SI_DTIME[r], to=vms$SI_ATIME[r], length.out = 3)[2]) + } + vms$SI_MIDTIME <- SI_MIDTIME + } else{ vms <- .vms[order(.vms$SI_MIDTIME),]} + #1- compare bk$mid.time and vms$mid.time + # find the nearest vms$mid.time for each bk$mid.time + # and then change levels + # (so for each mid.time in bk, a tripnum will be find) + # (so no lines in bk without a tripnum...) + fa1 <- levels(factor(bk$LE_MIDTIME)) + new.levels <- fa1 + fa2 <- levels(factor(vms$SI_MIDTIME)) + for(i in 1:length(fa1)) { # for each level in logbk + tmp <- abs(as.numeric( as.POSIXct(fa2, tz='GMT') - as.POSIXct(fa1, tz='GMT')[i] )) + new.levels[i] <- fa2 [which(tmp == min(tmp, na.rm=TRUE) )] # find the nearest level in vms + } + bk$LE_MIDTIME <- factor(as.character(bk$LE_MIDTIME)) + levels(bk$LE_MIDTIME) <- new.levels # and change mid.time in logbk to force the merging + + # finally, replace levels by the tripnum + # (note: a same FT_REF in vms can have different mid.time + # due to the first merging of vms to logbk in the vms analysis) + tmp <- vms[vms$SI_MIDTIME %in% bk$LE_MIDTIME , c("FT_REF","SI_MIDTIME")] + tmp2 <- tmp[!duplicated(data.frame(tmp$FT_REF, tmp$SI_MIDTIME)),] + idx2 <- match(levels(bk$LE_MIDTIME), tmp2$SI_MIDTIME) + bk$FT_REF <- bk$LE_MIDTIME # init + levels(bk$FT_REF) <- as.character(tmp2$FT_REF) [idx2] + + # output + bk$LE_MIDTIME <- as.character(bk$LE_MIDTIME) + bk$FT_REF <- as.character(bk$FT_REF) + .logbk[idx,][order(.logbk[idx,]$LE_DTIME),] <- bk + } + + + } # end FRANCOIS + + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # ASSIGN A RECTANGLE TO EACH PING #!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + .vms <- assignPointsToSpatialGrid(xx=.vms) + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # COMPUTE EFFORT.MINS !#!#!#!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # vms + .vms <- .vms[order(.vms$SI_DATIM),] + .vms$LE_EFF_VMS <- abs(c(0, as.numeric(.vms[-nrow(.vms),"SI_DATIM"] - + .vms[-1,"SI_DATIM"], units="mins"))) + start.trip <- c(1,diff(.vms[,"SI_FT"])) + .vms[start.trip!=0, "LE_EFF_VMS"] <- 0 # just correct for the trip change points + # logbook (start/end of trip)- caution: will be repeated per ping in the merged output + #SI_DATIM <- as.POSIXct(paste(.logbk$FT_DDAT,.logbk$FT_DTIME,sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + #SI_DATIM2 <- as.POSIXct(paste(.logbk$FT_LDAT,.logbk$FT_LTIME,sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + #.logbk$LE_EFF <- an(difftime(SI_DATIM2,SI_DATIM,units="mins")) + + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # ASSIGN FISHING/NON-FISHING (optional)!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + if(general$detectFishing && general$speed=="segment") { + ## add a gear form tacsat from the logbook info (after the first merging) + ## because the assignement of a state is gear-specific. + ## caution here: we assume only one gear used inside a trip... + # because note that we remove 'logevent' and keep only one duplicate of tripnum + .vms$LE_GEAR <- factor(.vms$FT_REF) # init + tmp <- .logbk[,c("LE_GEAR","FT_REF")] + tmp <- tmp[!duplicated(tmp$FT_REF),] #remove logevent and keep only one duplicate of tripnum + tmp <- tmp[tmp$FT_REF %in% unique(.vms$LE_GEAR),] + idx <- match(levels(.vms$LE_GEAR), as.character(tmp$FT_REF)) + dd <- as.character(tmp$LE_GEAR) [idx] + dd <- replace(dd, is.na(dd), "UKN") # unknown because not matched if Niels + levels(.vms$LE_GEAR) <- dd + + # then do the assignement of the state + # according to a segmented regression on the speed histogram + .vms <- segmentTacsatSpeed ( + tacsat=.vms, + vessels=a.vesselid, + force.lower.bound=0.5, # to do: create an arg in the parent function instead... + gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'), # to do: create an arg in the parent function instead... + general=list(a.year=general$a.year, + output.path=general$output.path, + what.speed=general$what.speed, + visual.check=TRUE + ) + ) + #=> (semi)automatic detection of the fishing peak + # (put here because the LE_GEAR need to be informed) + # alternatively, + if(FALSE){ + .vms$SI_STATE <- 0 + .vms <- segmentedTacsatSpeed(.vms[.vms$VE_REF==a.vesselid,], units="year", analyse.by="LE_GEAR", + speed="instantaneous", logfit=FALSE, CI=0.95) + .vms$SI_STATE <- .vms$SI_STATE+1 # back compatibility with mergeEflalo2Pings() i.e. 1: fishing, 2: steaming + # ...but what about the passive gears then? + } + + + .vms <- .vms[, !colnames(.vms) %in% "LE_GEAR"] # remove after use to avoid future conflict. + } + # some alternatives TO DO: + #if(general$detectFishing && general$speed=="lookuptable") + # .vms <- lookupSpeedTacsat (tacsat=.vms, vessels=a.vesselid) + #if(general$detectFishing && general$speed=="bayesian") + # .vms <- bayesianFiltering (tacsat=.vms, vessels=a.vesselid) + + + + rm(er); rm(xx) ; gc(reset=TRUE) + + + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # MERGING WITH VMS PER TRIP !!!!!!!!!!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + do.merging <- function(method="FT_REF", .logbk, .vms, general){ + + # IF BY PING------------- + # find total nb of FISHING ping per tripnum from vms # used for method 1 'FT_REF' + if(method=="FT_REF"){ + # .vms$count.fping.trip <- factor(.vms$FT_REF) # init + # count.fping.trip <- table(.vms[.vms$SI_STATE==1,]$FT_REF) + # # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging + # levels(.vms$count.fping.trip) <- count.fping.trip[levels(.vms$count.fping.trip)] # mapping + # .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA + + # => COUNT nb of FISHING pings per FT_REF because each weight will be repeated by ping after merging + if(any(.vms$SI_STATE==1)){ + .vms$count.fping.trip <- factor(.vms$FT_REF) # init + countp <- countPings(~VE_REF+FT_REF, .vms[.vms$SI_STATE=="1",]) + rownames(countp) <- countp$FT_REF + levels(.vms$count.fping.trip) <- countp[levels(.vms$count.fping.trip),"pings"] # mapping + if(any(.vms$SI_STATE %in% 2)) .vms[.vms$SI_STATE==2,]$count.fping.trip <- NA + } else{.vms$count.fping.trip <- NA} + + + # => COUNT nb of gears per FT_REF because each ping will be repeated by gear after merging + count.gr.trip <- tapply(.logbk$LE_GEAR, .logbk$FT_REF, function(x) length(unique(x))) + .logbk$count.gr.trip <- count.gr.trip[.logbk$FT_REF] # mapping + + } + + + # find total nb of FISHING ping per trip-icessquare from vms # used for method 2 'FT_REF_SQ' + if(method=="FT_REF_SQ"){ + # .vms$count.fping.trip.sq <- factor(.vms$FT_REF_SQ) # init + # count.fping.trip.sq <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ) # COUNT nb of FISHING pings per FT_REF_SQ + # levels(.vms$count.fping.trip.sq) <- count.fping.trip.sq[levels(.vms$count.fping.trip.sq)] # mapping + # if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA + + if(any(.vms$SI_STATE==1)){ + .vms$count.fping.trip.sq <- factor(.vms$FT_REF_SQ) # init + countp <- countPings(~VE_REF+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",]) + rownames(countp) <- interaction(countp$FT_REF,countp$SI_RECT) + levels(.vms$count.fping.trip.sq ) <- countp[levels(.vms$count.fping.trip.sq),"pings"] # mapping + if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq <- NA + } else{.vms$count.fping.trip.sq <- NA} + + + # => COUNT nb of gears per FT_REF_SQ because each ping will be repeated by gear after merging + count.gr.trip.sq <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ, function(x) length(unique(x))) + .logbk$count.gr.trip.sq <- count.gr.trip.sq[.logbk$FT_REF_SQ] # mapping + } + + + # find total nb of FISHING ping per trip-icessquare-day from vms # used for method 3 'FT_REF_SQ_DAY' + if(method=="FT_REF_SQ_DAY"){ +# .vms$count.fping.trip.sq.day <- factor(.vms$FT_REF_SQ_DAY) # init +# count.fping.trip.sq.day <- table(.vms[.vms$SI_STATE==1,]$FT_REF_SQ_DAY) # COUNT nb of FISHING pings per FT_REF_SQ_DAY +# levels(.vms$count.fping.trip.sq.day) <- count.fping.trip.sq.day[levels(.vms$count.fping.trip.sq.day)] # mapping +# if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA + + + if(any(.vms$SI_STATE==1)){ + .vms$count.fping.trip.sq.day <- factor(.vms$FT_REF_SQ_DAY) # init + countp <- countPings(~VE_REF+day+SI_RECT+FT_REF, .vms[.vms$SI_STATE=="1",]) + rownames(countp) <- interaction(countp$FT_REF, countp$SI_RECT, countp$SI_DAY) + levels(.vms$count.fping.trip.sq.day) <- countp[levels(.vms$count.fping.trip.sq.day),"pings"] # mapping + if(any('2' %in% unique(.vms$SI_STATE))) .vms[.vms$SI_STATE==2,]$count.fping.trip.sq.day <- NA + } else{.vms$count.fping.trip.sq.day <- NA} + + + # => COUNT nb of gears per FT_REF_SQ_DAY because each ping will be repeated by gear after merging + count.gr.trip.sq.day <- tapply(.logbk$LE_GEAR, .logbk$FT_REF_SQ_DAY, function(x) length(unique(x))) + .logbk$count.gr.trip.sq.day <- count.gr.trip.sq.day[.logbk$FT_REF_SQ_DAY] # mapping} + } + + + + # do the merging between .logbk and .vms according to + # meth1: 'FT_REF' OR meth2: 'FT_REF_SQ' OR meth3: 'FT_REF_SQ_DAY' + # need to use a trick to avoid "out of memory" doing the merge() + coln.idx1 <- which(!colnames(.logbk)%in%c("VE_REF", method)) + coln1 <- colnames(.logbk)[coln.idx1] + tmp1 <- data.frame(coll= collapse.all.columns (.logbk, columns= coln.idx1 ), + VE_REF=.logbk$VE_REF, a.method= .logbk[,method] ) #.logbk + coln.idx2 <- which(!colnames(.vms)%in%c("VE_REF", method)) + coln2 <- colnames(.vms)[coln.idx2] + tmp2 <- data.frame(coll2= collapse.all.columns (.vms, columns= coln.idx2 ), + VE_REF=.vms$VE_REF, a.method= .vms[,method] ) #.vms + tmp1[,"a.method"] <- factor(tmp1[,"a.method"] ) + tmp2[,"a.method"] <- factor(tmp2[,"a.method"] ) + + merged.this.vessel <- merge(tmp1, tmp2, all.x=TRUE, all.y=TRUE, suffixes = c(".bk",".vms")) + #=> so, with all.y = TRUE, the vms records without corresponding logbk records are kept and NA are produced on the logbook part + #=> so, with all.x = TRUE, the logbk records without corresponding vms records are kept and NA are produced on the vms part + merged.this.vessel$coll <- replace(as.character(merged.this.vessel$coll),is.na(merged.this.vessel$coll), paste(rep("NA",length(coln1)),collapse="#")) + merged.this.vessel$coll <- factor(merged.this.vessel$coll) + #=> adapt 'coll' to get a vector of NA (NA in case of 'in vms but not in logbook') + merged.this.vessel$coll2 <- replace(as.character(merged.this.vessel$coll2),is.na(merged.this.vessel$coll2), paste(rep("NA",length(coln2)),collapse="#")) + # adapt 'coll2' to get a vector of NA (NA in case of 'in logbook but not in vms') + merged.this.vessel$coll2 <- factor(merged.this.vessel$coll2) + colnames(merged.this.vessel)[colnames(merged.this.vessel)%in%"a.method"] <- method + + tmp3 <- uncollapse.column(merged.this.vessel, column="coll") # logbk + tmp4 <- uncollapse.column(merged.this.vessel, column="coll2") # vms + tmp5 <- cbind.data.frame(merged.this.vessel[,c("VE_REF", method)], tmp3, tmp4) + colnames(tmp5) <- c("VE_REF", method, coln1, coln2) + merged.this.vessel <- tmp5 + + # we can choose to correct to keep the land. weight: + # the loss in weight will come from the matching records having catches but + # without fishing pings (i.e. only steaming pings)! + if(is.null(general$conserve.all)) general$conserve.all <- FALSE + if(general$conserve.all){ + # do the conservation of landings anyway? + # detect possible weight landed while no feffort detected from vms + # find FT_REF with some NA + vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip=="NA","FT_REF"])) + # then, find FT_REF with at least one no NA + no.vv<- anf(unique(merged.this.vessel[merged.this.vessel$count.fping.trip!="NA","FT_REF"])) + tripnum.all.na.inside <- vv[!vv%in%no.vv] # trip num without at least one count.fping! + # so, deduce loss in weight + zz<- merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,] + + if(method=="FT_REF"){ + # in this case, reallocate evenly between all pings (caution: including steaming pings) + merged.this.vessel[,"count.fping.trip"] <- anf(merged.this.vessel[,"count.fping.trip"]) + merged.this.vessel$FT_REF <- factor( merged.this.vessel$FT_REF) + nbpings.per.trip <- unlist(lapply(split(merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,], + merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside,]$FT_REF),nrow)) + merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "count.fping.trip"] <- rep(nbpings.per.trip,nbpings.per.trip ) + merged.this.vessel[merged.this.vessel$FT_REF %in% tripnum.all.na.inside, "flag"] <- 5 + } + } # end conserve.all + + + + + # apply the catches re-distribution + # method 1, 2 and 3: per ping + # PER PING: + # ASSUMING EQUAL ALLOCATION BETWEEN FISHING PINGS AND GEARS USE INSIDE A SAME TRIP + nm <- names(merged.this.vessel) + idx.col.w <- grep('KG', nm) # index columns with species weight + idx.col.v <- grep('EURO', nm) # index columns with species value + idx.col <- c(idx.col.w, idx.col.v) + if(method=="FT_REF_SQ_DAY"){ + merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) / + anf(merged.this.vessel$count.fping.trip.sq.day)) / + anf(merged.this.vessel$count.gr.trip.sq.day) + } + if(method=="FT_REF_SQ"){ + merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) / + anf(merged.this.vessel$count.fping.trip.sq)) / + anf(merged.this.vessel$count.gr.trip.sq) + } + if(method=="FT_REF"){ + merged.this.vessel[,idx.col] <- (apply(merged.this.vessel[,idx.col],2,anf) / + anf(merged.this.vessel$count.fping.trip) ) / + anf(merged.this.vessel$count.gr.trip) + } + + + + + return(merged.this.vessel) + } + + + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # SET UP PRIMARY KEYS FOR MERGING!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + .logbk$FT_REF_SQ_DAY <- factor(paste(.logbk$FT_REF, ".", .logbk$LE_RECT,".", an(format(.logbk$LE_CTIME, '%j')), sep='')) + .vms$FT_REF <- factor(.vms$FT_REF) + .vms$FT_REF_SQ <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT, sep='')) + .vms$FT_REF_SQ_DAY <- factor(paste(.vms$FT_REF, ".", .vms$SI_RECT,".", an(format(.vms$SI_DATIM, '%j')), sep='')) + + + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # AGGREGATE WEIGHT PER SPECIES !#!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + nm <- names(.logbk) + idx.col.w <- grep('KG', nm) # index columns with species weight + idx.col.v <- grep('EURO', nm) # index columns with species value + idx.col <- c(idx.col.w, idx.col.v) + + DT <- data.table(.logbk) # library data.table for fast grouping replacing aggregate() + # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ_DAY (NOTE: SO, 'LE_SEQNUM' IS AGGREGATED HERE) + eq1 <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep="")) + .logbk <- DT[,eval(eq1),by=list(FT_REF_SQ_DAY,VE_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)] + .logbk <- data.frame(.logbk) + colnames(.logbk) <- c("FT_REF_SQ_DAY","VE_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col]) + + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + # MERGING PROCEDURE CHOICE !#!#!#!#!#!#!#!#!#!#!# + #!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!#!# + + my.split <- function(obj,a.sep="\\.",idx=1) unlist(lapply(strsplit(obj, a.sep),function(x)x[idx])) + # reduce the level + .logbk$FT_REF_SQ <- factor(paste(my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=1), + my.split(as.character(.logbk$FT_REF_SQ_DAY),a.sep="\\.",idx=2),sep='.')) + # reduce the level + .logbk$FT_REF <- factor(my.split(as.character(.logbk$FT_REF_SQ),a.sep="\\.",idx=1)) + + # find common keys + tripnum.sq.day.in.vms.and.in.bk <- .vms$FT_REF_SQ_DAY [.vms$FT_REF_SQ_DAY %in% .logbk$FT_REF_SQ_DAY] + tripnum.sq.in.vms.and.in.bk <- .vms$FT_REF_SQ [.vms$FT_REF_SQ %in% .logbk$FT_REF_SQ] + .vms.in.bk <- .vms[ .vms$FT_REF_SQ_DAY %in% tripnum.sq.day.in.vms.and.in.bk,] + .vms.in.bk2 <- .vms[ !(.vms$FT_REF_SQ_DAY %in% tripnum.sq.day.in.vms.and.in.bk) & + .vms$FT_REF_SQ %in% tripnum.sq.in.vms.and.in.bk,] + in.bk.and.feffort.not.at.0 <- unique(.vms.in.bk[.vms.in.bk$SI_STATE==1,]$FT_REF_SQ_DAY) + in.bk2.and.feffort.not.at.0 <- unique(.vms.in.bk2[.vms.in.bk2$SI_STATE==1,]$FT_REF_SQ) + + # split .vms and .logbk in three blocks + # vms with good match => go to meth3 + .vms.for.meth3 <- .vms [.vms$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0, ] + # vms with intermediate match => go to meth2 + .vms.for.meth2 <- .vms [!(.vms$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & + (.vms$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] + # vms with bad match => go to meth1 + .vms.for.meth1 <- .vms [!(.vms$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & + !(.vms$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] + # logbk with good match => go to meth3 + .logbk.for.meth3 <- .logbk [.logbk$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0, ] + # logbk with intermediate match => go to meth2 + .logbk.for.meth2 <- .logbk [!(.logbk$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & + (.logbk$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] + # logbk with bad match => go to meth1 + .logbk.for.meth1 <- .logbk [!(.logbk$FT_REF_SQ_DAY %in% in.bk.and.feffort.not.at.0) & + !(.logbk$FT_REF_SQ %in% in.bk2.and.feffort.not.at.0), ] + + suppressWarnings(rm(merged1, merged2, merged3)) # clear + #!! METH1 !!# + if(nrow(.logbk.for.meth1)!=0 && nrow(.vms.for.meth1)!=0 ) { + # remove useless cols and aggregate according to the key 'FT_REF' + .logbk.for.meth1 <- .logbk.for.meth1[, !colnames(.logbk.for.meth1)%in% c("FT_REF_SQ_DAY","FT_REF_SQ")] + nm <- names(.logbk.for.meth1) + idx.col.w <- grep('KG', nm) # index columns with species weight + idx.col.v <- grep('EURO', nm) # index columns with species value + idx.col <- c(idx.col.w, idx.col.v) + # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF + DT <- data.table(.logbk.for.meth1) # library data.table for fast grouping replacing aggregate() + eq1 <- c.listquote(paste("sum(", nm[idx.col],",na.rm=TRUE)",sep="")) + .logbk.for.meth1 <- DT[,eval(eq1),by=list(VE_REF,FT_REF,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)] + .logbk.for.meth1 <- data.frame(.logbk.for.meth1) + colnames(.logbk.for.meth1) <- c("VE_REF","FT_REF","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col]) + # do.merging + merged1 <- do.merging(method="FT_REF", .logbk.for.meth1, .vms.for.meth1, general) + # add meth flag + if("flag" %in% names(merged1) && nrow(merged1[is.na(merged1[,"flag"]),])!=0){ + merged1[is.na(merged1[,"flag"]),"flag"] <- 1 # meth 1 + } else merged1$flag <- 1 + } + #!! METH2 !!# + if(nrow(.logbk.for.meth2)!=0 && nrow(.vms.for.meth2)!=0 ) { + # remove useless cols and aggregate according to the key 'FT_REF_SQ' + .logbk.for.meth2 <- .logbk.for.meth2[, !colnames(.logbk.for.meth2)%in% c("FT_REF_SQ_DAY","FT_REF")] + nm <- names(.logbk.for.meth2) + idx.col.w <- grep('KG', nm) # index columns with species weight + idx.col.v <- grep('EURO', nm) # index columns with species value + idx.col <- c(idx.col.w, idx.col.v) + # AGGREGATE WEIGHT (OR VALUE) PER SPECIES PER FT_REF_SQ + DT <- data.table( .logbk.for.meth2) # library data.table for fast grouping replacing aggregate() + eq2 <- c.listquote(paste("sum(",nm[idx.col],",na.rm=TRUE)",sep="")) + .logbk.for.meth2 <- DT[,eval(eq2),by=list(VE_REF,FT_REF_SQ,VE_FLT,VE_KW,LE_MET_level6,LE_GEAR)] + .logbk.for.meth2 <- data.frame(.logbk.for.meth2) + colnames(.logbk.for.meth2) <- c("VE_REF","FT_REF_SQ","VE_FLT","VE_KW","LE_MET_level6","LE_GEAR",nm[idx.col]) + # do.merging + merged2 <- do.merging(method="FT_REF_SQ", .logbk.for.meth2, .vms.for.meth2, general) + # add meth flag + merged2$flag <- 2 # meth 2 + } + #!! METH3 !!# + if(nrow(.logbk.for.meth3)!=0 && nrow(.vms.for.meth3)!=0 ) { + # do.merging + merged3 <- do.merging(method="FT_REF_SQ_DAY", .logbk.for.meth3, .vms.for.meth3, general) + # add meth flag + merged3$flag <- 3 # meth 3 + } + + # bind the three blocks + merged <- NULL ; colnm <- NULL + for(i in 1: 3){ + a.table <- try(get(paste('merged',i,sep='')), silent=TRUE) + if(class(a.table)!="try-error"){ + a.table <- a.table[, !colnames(a.table) %in% + c("count.fping.trip.sq.day","count.fping.trip.sq","count.fping.trip", + "tot.fish.effort.trip","tot.fish.effort.trip.sq", + "count.gr.trip", "count.gr.trip.sq", "count.gr.trip.sq.day", + "FT_REF_SQ", "FT_REF_SQ_DAY")] # remove tool columns + if(i==1) colnm <- colnames(a.table) ; if(is.null(colnm)) colnm <- colnames(a.table) + merged <- rbind.data.frame (merged, a.table[, colnm]) + } + } + + # if still 'not merging' part, retrieve on NA side i.e. occurs when pings in vms but not in bk + merged <- retrieveOnBkSide(merged, type.data=c( "VE_FLT","VE_KW","LE_GEAR", "LE_MET_level6")) # i.e. when metier=='NA' + + + # clean up + rm(a.table, merged1, merged2, merged3, merged.this.vessel,.vms, .logbk, logbk.this.vessel, vms.this.vessel) + gc(reset=TRUE) + + # restore tacsat names "%e/%m/%Y %H:%M" + idx <- merged$SI_DATIM!='NA' # NA is possible when bk not in vms because bk.tripnum vms may belong to another block than block1 + merged$SI_DATIM <- as.character(merged$SI_DATIM) + merged$SI_DATE <- NA + merged[idx,"SI_DATE"] <- paste(substr(merged[idx,]$SI_DATIM ,9,10),"/", + substr(merged[idx,]$SI_DATIM , 6,7), "/", substr(merged[idx,]$SI_DATIM ,1,4), sep='') + merged$SI_TIME <- NA + merged[idx,"SI_TIME"] <- paste(substr(merged[idx,]$SI_DATIM , 12,13),":", + substr(merged[idx,]$SI_DATIM , 15,16), sep='') + + # last calculation + merged$KW_HOURS <- anf(merged$VE_KW) * anf(merged$LE_EFF_VMS) /60 + + # order chronologically + merged <- orderBy(~SI_DATIM, merged) + + # last clean up + merged <- merged[, !colnames(merged) %in% c('idx', 'icessquare', "SI_DATIM", "SI_MIDTIME")] + + # save------------ + save("merged", file=file.path(general$output.path, + paste("merged_", a.vesselid,"_",general$a.year,".RData", sep=''))) + cat(paste("save 'merged'...OK\n\n",sep="")) + + + }else{ # end 'a.flag' + cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n")) + # because no vms for this vessel... + # TO DO: the logbk way + #... + } + }else{ # end try-error + cat(paste("failure for",a.vesselid,"(probably not vms-equipped)\n")) + # because no vms for this vessel... + # TO DO: the logbk way + #... + } + + + + + } # end a.vesselid + + + + + + +return() +} + + + + + + + ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + ##!!!!!MAIN!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + ##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + if(FALSE) { + + + #\dontrun{ + data(eflalo) + data(tacsat) + data(euharbours); euharbours <- harbours + + # format + eflalo <- formatEflalo(eflalo) + tacsat <- formatTacsat(tacsat) + + # order tacsat chronologically with library(doBy) + tacsat <- sortTacsat(tacsat) + + # test each ping if in harbour or not + tacsat$SI_HARB <- NA + euharbours$Description <- euharbours$harbour + tacsat$SI_HARB <- pointInHarbour(lon=anf(tacsat$SI_LONG), + lat=anf(tacsat$SI_LATI), + harbours=euharbours, + rowSize=30, returnNames=TRUE) + inHarb <- tacsat$SI_HARB + inHarb <- replace(inHarb, !is.na(inHarb), 1) + inHarb <- replace(inHarb, is.na(inHarb), 0) + inHarb <- as.numeric(inHarb) + + # assign a trip identifier + tacsat$SI_FT <- 1 # init + idx <- which(inHarb==0) + tacsat[idx,"SI_FT"] <- cumsum(inHarb) [idx] # add a SI_FT index + + # keep 'out of harbour' points only + # (but keep the departure point and the arrival point lying in the harbour) + startTrip <- c(diff(tacsat[,"SI_FT"]), 0) + endTrip <- c(0, diff(tacsat[,"SI_FT"])) + tacsat[which(startTrip>0),"SI_FT"] <- tacsat[which(startTrip>0)+1,"SI_FT"] + tacsat[which(endTrip<0),"SI_FT"] <- tacsat[which(endTrip<0)-1,"SI_FT"] + tacsat <- tacsat[which(inHarb==0 | startTrip>0 | endTrip<0),] + + + # assign a state to each ping (here, useless if detectFishing at TRUE) + tacsat$SI_STATE <- 2 # init (1: fishing; 2: steaming) + # fake speed rule for fishing state + tacsat$SI_STATE [(tacsat$SI_SP>4 & tacsat$SI_SP<8)] <-1 + + + # reduce the size of the eflalo data by merging species + # (assuming that the other species is coded MZZ), threshold in euros. + eflalo2 <- poolEflaloSpecies (eflalo, threshold=1e6, code="MZZ") + + # debug if eflalo has not been cleaned earlier + eflalo <- eflalo[!eflalo$VE_REF=="NA" &!is.na(eflalo$VE_REF),] + + # an informed VE_FLT is also required + if(all(is.na(eflalo$VE_FLT))) eflalo$VE_FLT <- "fleet1" + + # possible mis-naming mistakes + if(!match('LE_MET_level6',colnames(eflalo))>0){ + eflalo$LE_MET_level6 <- eflalo$LE_MET + } + + # debug + eflalo <- eflalo[eflalo$LE_MET!="No_logbook6",] + + + # TEST FOR A GIVEN SET OF VESSELS + # (if detect.fishing is true then do also detection of fishing activity + # e.g. if speed='segment' the segmentTacsatSpeed() automatic detection of fishing states + # that will overwrite the existing SI_STATE) + mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, vessels=c("738", "804"), + general=list(output.path=file.path("C:","output"), + visual.check=TRUE, detectFishing=TRUE, speed="segment", + what.speed="calculated")) + # ...OR APPLY FOR ALL VESSELS IN eflalo + mergeEflalo2Pings (eflalo=eflalo, tacsat=tacsat, + general=list(output.path=file.path("C:","output"), + visual.check=TRUE, detectFishing=TRUE, speed="segment", + what.speed="calculated")) + gc(reset=TRUE) + + # load the merged output table for one vessel + load(file.path("C:","output","merged_804_1800.RData")) + + # check the conservation of landings + sum(tapply(anf(merged$LE_KG_PLE), merged$flag, sum, na.rm=TRUE)) + sum(eflalo[eflalo$VE_REF=="804","LE_KG_PLE"], na.rm=TRUE) + + + # ...or bind all vessels (keeping only some given species here) + bindAllMergedTables (vessels=c("738", "804"), a.year = "1800", + species.to.keep=c("PLE","COD"), + folder = file.path("C:","output"), + all.in.one.table=TRUE) + + # ...and load the merged output table for all vessels + load(file.path("C:","output","all_merged__1800.RData")) + + # map landing of cod from all studied vessels + # ( with debugging if tacsat has not been cleaned earlier) + graphics.off() + df1<- all.merged[, c("SI_LATI","SI_LONG","LE_KG_COD")] + df1$SI_LONG <- anf(df1$SI_LONG) + df1$SI_LATI <- anf(df1$SI_LATI) + df1 <- df1[ !is.na(df1$SI_LATI),] + df1 <- df1[ !is.na(df1$SI_LONG),] + vmsGridCreate(df1,nameLon="SI_LONG", nameLat="SI_LATI", + nameVarToSum = "LE_KG_COD", cellsizeX =0.1, + cellsizeY =0.05, legendtitle = "COD landings (kg)") + + # but you need to remove steaming points before gridding! + df2<-df1[-which(is.na(df1$LE_KG_COD)),] + vmsGridCreate(df2,nameLon="SI_LONG",nameLat="SI_LATI", we = 3, ea = 6, so = 50, no = 54, + nameVarToSum = "LE_KG_COD",cellsizeX =0.1, + cellsizeY =0.05, legendtitle = "COD landings (kg)", plotPoints =TRUE, + breaks0=c(1,2,4,8,16,32,64,100000)) + + + + # CONVERT TO FISHFRAME FORMAT (might take some time running) + # (by default, this will keep all the species in the output table) + tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF), + species.to.keep=character(), + folder = file.path("C:","output"), + all.in.one.table=FALSE) + + ff <- pings2Fishframe (general=list(output.path=file.path("C:","output"), + a.year=1800, a.country="NLD", degree=0.05 ) ) + + + # TO DO.... + # Use the interpolation routine to improve the location of the effort + #all.merged$SI_SP <- as.numeric(as.character( all.merged$SI_SP)) + #all.merged$SI_HE <- as.numeric(as.character( all.merged$SI_HE)) + #all.merged$SI_LONG <-as.numeric(as.character(all.merged$SI_LONG)) + #all.merged$SI_LATI <-as.numeric(as.character(all.merged$SI_LATI)) + #interpolations <- interpolateTacsat( all.merged [,c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE")] + # ,interval=120 + # ,margin=12 + # ,res=100 + # ,method="cHs" + # ,params=list(fm=0.5,distscale=20,sigline=0.2,st=c(2,6)) + # ,headingAdjustment=0 + # ) + #interpolationsED <- equalDistance(interpolations,res=10) + # make sure that the 'res' statement in the interpolateTacsat is significantly bigger + # than the 'res' statement in the equalDistance function. + + # then map again... + #vmsGridCreate(interpolationsED,nameLon="SI_LONG",nameLat="SI_LATI", + # cellsizeX =0.05, cellsizeY =0.05, legendtitle = "landings (kg)") + + + + #} + +} # end main diff --git a/vmstools/R/nestedGrid.r b/vmstools/R/nestedGrid.r index 0c670b0..326c10a 100644 --- a/vmstools/R/nestedGrid.r +++ b/vmstools/R/nestedGrid.r @@ -1,85 +1,184 @@ -nestedGrid <- function(tacsat,resx,resy,maxstep = 10, n = 20,control=list(clm=NULL,FUN=NULL)){ - lon <- tacsat$SI_LONG; lat <- tacsat$SI_LATI - poly_wkt <- rep(NA,length(lon)) - idx <- 1:length(lon) - i <- 1 - while(i <= maxstep & length(idx) > 0){ - x <- 2^floor((i-1)/2) - y <- 2^floor((i)/2) - poly <- polyDef(lon[idx], lat[idx], resx/x, resy/y) - count <- table(poly) - - idxin <- which(poly %in% names(count[which(count >= n)])) - idxout <- which(poly %in% names(count[which(count < n)])) - poly_wkt[idx[idxout]] <- poly[idxout] - - #- Final rounds - if(i == maxstep){ - idxout <- 1:length(poly) - poly_wkt[idx[idxout]] <- poly[idxout] - } - - idx <- idx[idxin] - i <- i + 1 - } - - #- Create SpatialPolygons - uniquePols <- poly_wkt[!duplicated(poly_wkt)] - Pol <- lapply(as.list(1:length(uniquePols)), - function(i){eval(parse(text=uniquePols[i]))}) - Pols <- lapply(as.list(1:length(uniquePols)),function(x){Polygons(list(Pol[[x]]),ID=x)}) - SP <- SpatialPolygons(Pols) - SPDF <- as(SP,"SpatialPolygonsDataFrame") - - #- Call the column to aggregate over - if(is.null(control$clm)==TRUE){ - tacsat$count <- rep(1,nrow(tacsat)) - clm <- "count" - } else { clm <- control$clm } - #- Do the aggregation - tacsat$pol <- poly_wkt - if(is.null(control$FUN)==TRUE){ - agg <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,sum,na.rm=TRUE)})) - } else { - agg <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,control$FUN,na.rm=TRUE)})) - } - SPDF@data <- data.frame(agg) - colnames(SPDF@data) <- clm - - return(SPDF) -} - -round2 <- function(x, n){ - posneg = sign(x) - z = abs(x)*10^n - z = z + 0.5 - z = trunc(z) - z = z/10^n - z*posneg - } - -polyDef <- function(lon, lat, gridx, gridy){ - # round to the nearest rectangle mid-point - lon1 <- round2((lon - gridx/2)/gridx , 0) * gridx + gridx/2 - lat1 <- round2((lat - gridy/2)/gridy , 0) * gridy + gridy/2 - - # create WKT sting - out <- paste('Polygon(cbind(c(' - ,lon1 - gridx/2,',' - ,lon1 + gridx/2,',' - ,lon1 + gridx/2,',' - ,lon1 - gridx/2,',' - ,lon1 - gridx/2 - ,'),c(' - ,lat1 - gridy/2,',' - ,lat1 - gridy/2,',' - ,lat1 + gridy/2,',' - ,lat1 + gridy/2,',' - ,lat1 - gridy/2 - ,')))' - ,sep='') - - return(out) -} - - +#' Define a nested grid +#' +#' Assign VMS points to a nested grid so that areas with a high density of +#' points will have small grid cells and areas with a low density will have +#' larger cells. +#' +#' The alogrithm works as follows: A coarse starting grid is applied to the +#' positional data, the number of datapoints in each grid cell is counted and +#' if this number \code{>= n} then the cell is split in two. Now the number of +#' datapoints in the smaller cells are counted again and any cells with +#' \code{>= n} will be split again, up to \code{maxstep} times. +#' +#' This function allows data-rich areas to be plotted with a high spatial +#' resolution while allowing a lower spatial resolution for data-poor areas. +#' The nested grid also tends to reduce the amount of clustering within each +#' grid cell, which is important for estimating the area impacted by fishing +#' gear inside each cell. %% ~~ If necessary, more details than the description +#' above ~~ +#' +#' @param tacsat Tacsat dataframe +#' @param resx gridcell size in degrees in the longitude / x direction +#' @param resy gridcell size in degrees in the latitude / y direction +#' @param maxstep The maxiumum number of times the grid cells should be split +#' @param n If the number of points in a cell \code{>= n} then split the cell +#' @param control A list determining the output in the \code{data} slot, the +#' possible components are: \describe{ \item{list("clm")}{ The name of the +#' tacsat column that \code{FUN} should be applied to. Also the name of the +#' only column in the dataframe that makes up the \code{data} slot. The +#' default (\code{clm = NULL}) results \code{FUN} being applied a new column +#' called \code{"count"} which is simply \code{rep(1,nrow(tacsat))}. } +#' \item{list("FUN")}{ The function to be applied to \code{tacsat[[clm]]}. The +#' default (\code{FUN = NULL}) results in the function \code{sum}, so if +#' \code{clm = NULL} and \code{FUN = NULL} the result will be a count of the +#' number of datapoints. } } +#' @return A SpatialPolygonsDataFrame, the value of the \code{data} slot will +#' be determined by the \code{control} settings. +#' @author Hans D. Gerritsen, Niels T. Hintzen +#' @seealso \code{\link{polyDef}} +#' @references Gerritsen, H. D., Minto, C. and Lordan, C. (2013) How much of +#' the seabed is impacted by mobile fishing gear? Absolute estimates from +#' Vessel Monitoring System (VMS) point data. ICES Journal of Marine Science +#' \bold{XX:XX}, doi: 10.1093/icesjms/fst017 +#' @examples +#' +#' data(tacsat) +#' tacsat <- tacsat[sample(nrow(tacsat),2500),] # to speed it up a bit +#' tacsat <- intervalTacsat(tacsat,level="vessel",fill.na=TRUE) +#' tacsat$INTV <- ifelse(tacsat$INTV > 240, 240, tacsat$INTV) +#' tacsat$GEAR_WIDTH <- 0.024 +#' tacsat$SWEPT_AREA <- tacsat$INTV / 60 * tacsat$SI_SP * tacsat$GEAR_WIDTH +#' SPDF <- nestedGrid(tacsat, resx=1, resy=0.5, maxstep=10, n=20, +#' control=list(clm="SWEPT_AREA",FUN=sum)) +#' SP <- as(SPDF,"SpatialPolygons") +#' SP <- surface(SP) +#' tempfun <- function(x){lapply(x@Polygons,function(y){y@area})} +#' SPDF@data$surface <- unlist(lapply(SP@polygons,tempfun)) +#' SPDF@data$SAratio <- SPDF@data$SWEPT_AREA / SPDF@data$surface +#' breaks <- c(seq(0,quantile(SPDF@data$SAratio,0.95),length.out=9),2,5,10) +#' i <- cut(SPDF@data$SAratio,breaks=breaks) +#' SPDF@data$col <- grey(seq(1, 0, length=length(breaks)))[i] +#' +#' plot(NA, xlim = c(1, 5), ylim = c(51.5,55), xlab = 'Longitude',ylab = 'Latitude' +#' ,asp=1/lonLatRatio(3,53)) +#' plot(SP,col=SPDF@data$col,add=TRUE,border="lightgrey"); box() +#' points(tacsat$SI_LONG,tacsat$SI_LAT,cex=0.1,col=4) +#' +#' @export nestedGrid +nestedGrid <- function(tacsat,resx,resy,maxstep = 10, n = 20,control=list(clm=NULL,FUN=NULL)){ + lon <- tacsat$SI_LONG; lat <- tacsat$SI_LATI + poly_wkt <- rep(NA,length(lon)) + idx <- 1:length(lon) + i <- 1 + while(i <= maxstep & length(idx) > 0){ + x <- 2^floor((i-1)/2) + y <- 2^floor((i)/2) + poly <- polyDef(lon[idx], lat[idx], resx/x, resy/y) + count <- table(poly) + + idxin <- which(poly %in% names(count[which(count >= n)])) + idxout <- which(poly %in% names(count[which(count < n)])) + poly_wkt[idx[idxout]] <- poly[idxout] + + #- Final rounds + if(i == maxstep){ + idxout <- 1:length(poly) + poly_wkt[idx[idxout]] <- poly[idxout] + } + + idx <- idx[idxin] + i <- i + 1 + } + + #- Create SpatialPolygons + uniquePols <- poly_wkt[!duplicated(poly_wkt)] + Pol <- lapply(as.list(1:length(uniquePols)), + function(i){eval(parse(text=uniquePols[i]))}) + Pols <- lapply(as.list(1:length(uniquePols)),function(x){Polygons(list(Pol[[x]]),ID=x)}) + SP <- SpatialPolygons(Pols) + SPDF <- as(SP,"SpatialPolygonsDataFrame") + + #- Call the column to aggregate over + if(is.null(control$clm)==TRUE){ + tacsat$count <- rep(1,nrow(tacsat)) + clm <- "count" + } else { clm <- control$clm } + #- Do the aggregation + tacsat$pol <- poly_wkt + if(is.null(control$FUN)==TRUE){ + agg <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,sum,na.rm=TRUE)})) + } else { + agg <- unlist(lapply(as.list(unique(tacsat$pol)),function(x){apply(t(tacsat[[clm]][which(tacsat$pol==x)]),1,control$FUN,na.rm=TRUE)})) + } + SPDF@data <- data.frame(agg) + colnames(SPDF@data) <- clm + + return(SPDF) +} + +round2 <- function(x, n){ + posneg = sign(x) + z = abs(x)*10^n + z = z + 0.5 + z = trunc(z) + z = z/10^n + z*posneg + } + + + +#' Define polygon in text string format +#' +#' Create a text string format polygon which can be called by other functions +#' +#' The function stets up a grid - with the origin at (0,0) - and assigns a grid +#' cell to each of the points given by \code{lon} and \code{lat}. +#' +#' @param lon Vector with longitude (or x) values +#' @param lat Vector with latitude (or y) values, should have the same length +#' as \code{lon} +#' @param gridx gridcell size in degrees in the longitude / x direction +#' @param gridy gridcell size in degrees in the latitude / y direction +#' @return A Well Known Text string for each value of \code{lon} and +#' \code{lat}. +#' @note Any points that lie exactly on the border between two grid cells will +#' be assigned to the grid cell above in the northern hemisphere, below in the +#' southern hemisphere, to the right in the eastern hemisphere and to the left +#' in the western hemisphere. +#' @author Hans D Gerritsen +#' @seealso \code{\link{nestedGrid}} +#' @examples +#' +#' lon <- rnorm(25,3) +#' lat <- rnorm(25,53) +#' pols <- polyDef(lon, lat, gridx = 0.5, gridy= 0.5) +#' plot(lon,lat) +#' tempfun <- function(i){polygon(eval(parse(text=pols[i]))@coords)} +#' lapply(as.list(1:length(pols)),tempfun) +#' +#' @export polyDef +polyDef <- function(lon, lat, gridx, gridy){ + # round to the nearest rectangle mid-point + lon1 <- round2((lon - gridx/2)/gridx , 0) * gridx + gridx/2 + lat1 <- round2((lat - gridy/2)/gridy , 0) * gridy + gridy/2 + + # create WKT sting + out <- paste('Polygon(cbind(c(' + ,lon1 - gridx/2,',' + ,lon1 + gridx/2,',' + ,lon1 + gridx/2,',' + ,lon1 - gridx/2,',' + ,lon1 - gridx/2 + ,'),c(' + ,lat1 - gridy/2,',' + ,lat1 - gridy/2,',' + ,lat1 + gridy/2,',' + ,lat1 + gridy/2,',' + ,lat1 - gridy/2 + ,')))' + ,sep='') + + return(out) +} + + diff --git a/vmstools/R/outsideMaxRange.R b/vmstools/R/outsideMaxRange.R index ecd6004..714d44f 100644 --- a/vmstools/R/outsideMaxRange.R +++ b/vmstools/R/outsideMaxRange.R @@ -1,26 +1,78 @@ -outsideMaxRange <- function(int - ,tacint - ,params - ,grid){ - if (!"SI_DATIM" %in% colnames(tacint)) - tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME, - sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - - CI <- calculateCI(int, - tacint, - params, - grid, - plot=FALSE) - - mxr <- maxRangeCI(x=c(int[-1,1][1],rev(int[-1,1])[1]), - y =c(int[-1,2][1],rev(int[-1,2])[1]), - Time=c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")), - speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 / - c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2))) - - coords <- coordinates(CI) - propCI <- point.in.polygon(coords[,1],coords[,2],mxr[[1]][,1],mxr[[1]][,2]) - insideR <- sum(CI@data$data[which(propCI == 1)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE) #Sum of total CI values inside the maximum range, should ideally be all the grid cells with values - outsideR<- sum(CI@data$data[which(propCI == 0)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE)#Sum of total CI values outside the maximum range, should ideally be 0 - maxR <- max(CI@data$data[which(propCI == 1)],na.rm=TRUE) #Top of the CI, should ideally equal to 1 -return(list(insideR,outsideR,maxR))} +#' compute fraction of Confidence Interval that is located outside a maximum +#' range +#' +#' The calculation of the Confidence Interval surrounding an interpolation +#' depends on two parameters: sigline & distscale. These use of these +#' parameters could result in extremely wide or extremely small CI's. To check +#' which proportion is located inside and outside the maximum range (as defined +#' by an ellipse), this function calculates this proportion, as well as the +#' maximum value representing the starting and end point CI values. +#' +#' +#' @param int interpolation, as data.frame from 'interpolateTacsat' +#' @param tacint tacsat records (two rows) corresponding with the interpolation +#' @param params list of parameters used to perform interpolation +#' @param grid object of class 'GridTopology' specifying the grid dimensions +#' @return Returnes a list with three objects: 1) Fraction of CI located +#' outside maximum range 2) Franction of CI located inside maximum range 3) +#' Maximum value of CI (top, at VMS points) +#' @author Niels T. Hintzen +#' @seealso \code{\link{createGrid}}, \code{\link{calculateCI}}, +#' \code{\link{point.in.polygon}} +#' @references Hintzen et al. 2010 Improved estimation of trawling tracks using +#' cubic Hermite spline interpolation of position registration data, EU lot 2 +#' project +#' @examples +#' +#' data(tacsat) +#' +#' #Sort the Tacsat data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Filter the Tacsat data +#' tacsat <- filterTacsat(tacsat,c(2,6),hd=NULL,remDup=TRUE) +#' +#' #Interpolate the VMS data +#' interpolation <- interpolateTacsat(tacsat,interval=120,margin=10, +#' res=100,method="cHs",params=list(fm=0.5,distscale=20, +#' sigline=0.2,st=c(2,6)),headingAdjustment=0) +#' +#' #Create the final grid where all interpolations should fit on +#' xrange <- c(2,3); yrange <- c(51,52) +#' grid <- createGrid(xrange,yrange,resx=0.01,resy=0.005) +#' +#' res <- outsideMaxRange(interpolation[[4]], +#' tacsat[interpolation[[4]][1,],], +#' params=list(fm=0.25,distscale=3.1, +#' sigline=0.4,st=c(2,6)), +#' grid=grid) +#' +#' +#' @export outsideMaxRange +outsideMaxRange <- function(int + ,tacint + ,params + ,grid){ + if (!"SI_DATIM" %in% colnames(tacint)) + tacint$SI_DATIMIM <- as.POSIXct(paste(tacint$SI_DATE, tacint$SI_TIME, + sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + + CI <- calculateCI(int, + tacint, + params, + grid, + plot=FALSE) + + mxr <- maxRangeCI(x=c(int[-1,1][1],rev(int[-1,1])[1]), + y =c(int[-1,2][1],rev(int[-1,2])[1]), + Time=c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="mins")), + speed=pmax(tacint$SI_SP,rep(distanceInterpolation(list(int)) / 1.852 / + c(difftime(tacint$SI_DATIM[2],tacint$SI_DATIM[1],units="hours")),2))) + + coords <- coordinates(CI) + propCI <- point.in.polygon(coords[,1],coords[,2],mxr[[1]][,1],mxr[[1]][,2]) + insideR <- sum(CI@data$data[which(propCI == 1)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE) #Sum of total CI values inside the maximum range, should ideally be all the grid cells with values + outsideR<- sum(CI@data$data[which(propCI == 0)],na.rm=TRUE) / sum(CI@data$data,na.rm=TRUE)#Sum of total CI values outside the maximum range, should ideally be 0 + maxR <- max(CI@data$data[which(propCI == 1)],na.rm=TRUE) #Top of the CI, should ideally equal to 1 +return(list(insideR,outsideR,maxR))} diff --git a/vmstools/R/overlapPolygons.r b/vmstools/R/overlapPolygons.r index 3bb5f18..ce6f32f 100644 --- a/vmstools/R/overlapPolygons.r +++ b/vmstools/R/overlapPolygons.r @@ -1,71 +1,131 @@ -# overlapPolygons to calculate the intersection area of 2 or more polygons - -overlapPolygons <- function(pol1=NULL,pol2=NULL,projection="LL",zone=NULL){ - - #- Class = Polyset - if(class(pol1)[1]=="PolySet") - pol1 <- as.PolySet(pol1,projection=projection,zone=zone) - if(class(pol2)[1]=="PolySet") - pol2 <- as.PolySet(pol2,projection=projection,zone=zone) - - #- Class = data.frame - if(class(pol1)[1] == "data.frame"){ - if(nrow(pol1)>ncol(pol1)) - pol1 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol1),X=pol1[,1],Y=pol1[,2]),projection=projection,zone=zone) - if(nrow(pol1)ncol(pol2)) - pol2 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol2),X=pol2[,1],Y=pol2[,2]),projection=projection,zone=zone) - if(nrow(pol2)ncol(pol1)) + pol1 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol1),X=pol1[,1],Y=pol1[,2]),projection=projection,zone=zone) + if(nrow(pol1)ncol(pol2)) + pol2 <- as.PolySet(data.frame(PID=1,POS=1:nrow(pol2),X=pol2[,1],Y=pol2[,2]),projection=projection,zone=zone) + if(nrow(pol2)=", replacement="o",met) # debug - a.met <- gsub("<", replacement="",a.met) # debug - a.met <- gsub(">", replacement="o",a.met) # debug - # create folders and save - dir.create(file.path(output, "jpegEffort", "overall", a.met)) - savePlot(filename = file.path(output, "jpegEffort","overall", a.met, - paste("map_effort_hours_merged_vessels_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg") - dev.off() - } - } - - # per metier, quarter - cat(paste("per metier, per quarter...", "\n")) - for (met in levels(all.merged$LE_MET_level6) ){ - for (a.quarter in levels(all.merged$quarter) ){ - - df1 <- all.merged[all.merged$LE_MET_level6==met & - all.merged$quarter==a.quarter & - all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")] - df1$SI_LATI <- anf(df1$SI_LATI ) # debug... - df1$SI_LONG <- anf(df1$SI_LONG ) # debug... - df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug... - df1 <- df1[ !is.na(df1$SI_LATI),] - df1$LE_EFF_VMS <- anf(df1$LE_EFF_VMS)/ 60 # in hours - if(nrow(df1)!=0){ - vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS", numCats=10, plotPoints =FALSE, legendtitle="fishing (hours)", - colLand="darkolivegreen4", addICESgrid=TRUE, - nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, - breaks0=breaks0 - , legendncol=2) - title(paste(met, "-", a.year, "-", a.quarter) ) - a.met <- gsub(">=", replacement="o",met) # debug - a.met <- gsub("<", replacement="",a.met) # debug - a.met <- gsub(">", replacement="o",a.met)# debug - dir.create(file.path(output, "jpegEffort", "quarter", a.met)) - # save - savePlot(filename = file.path(output, "jpegEffort", "quarter", a.met, - paste("map_effort_hours_merged_vessels_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg") - dev.off() - } - }} - - return() - } - - -# pings2EffortMaps (all.merged=all.merged, output= file.path("C:","VMSanalysis", "FemernBelt"), -# cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, -# breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000)) + # Author: F.Bastardie + + +#' generate some fishing effort maps from the merged output table +#' +#' generate some effort maps (jpeg files) from the merged table per year, per +#' metier (level6), per metier-quarter all together +#' +#' using some arguments informing mapGrid(). Performing automatic creation of a +#' hierarchy of output folders. see also pings2LandingsMaps(). +#' +#' @param all.merged the data.frame from merged VMS with logbooks +#' @param output a path for the output folder to be created +#' @param cellsizeX additional arguments for mapGrid +#' @param cellsizeY additional arguments for mapGrid +#' @param we additional arguments for mapGrid +#' @param ea additional arguments for mapGrid +#' @param no additional arguments for mapGrid +#' @param so additional arguments for mapGrid +#' @param breaks0 additional arguments for mapGrid +#' @author Francois Bastardie +#' @examples +#' +#' +#' \dontrun{ +#' +#' years= c('1800') +#' +#' for(a.year in years){ +#' +#' graphics.off() +#' +#' # ...and load the merged output table for all vessels +#' load(file.path("C:","output",paste("all_merged__",a.year,".RData",sep=''))) +#' +#' # generate the effort maps (jpeg files) and store in a hierarchy of folders +#' pings2EffortMaps (all.merged=all.merged, output= +#' file.path("C:","output"), +#' cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54, +#' breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000)) +#' +#' # generate the landings maps (jpeg) and store in a hierarchy of folders +#' pings2LandingsMaps (all.merged=all.merged, sp="LE_KG_COD", +#' output= file.path("C:","output"), +#' cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54, +#' breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4), +#' 100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)) +#' +#' } +#' +#' # create some animations from the sequential jpeg files... +#' # (need the 'animation' R package and ImageMagik installed) +#' landingsMaps2GIFanim(idir = file.path("C:","output"), spp=c("COD") ) +#' +#' } +#' +#' +#' @export pings2EffortMaps + pings2EffortMaps <- + function (all.merged, output= file.path("C:","VMSanalysis", "FemernBelt"), + cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, + breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000)){ + + # add a quarter + cat(paste("add a quarter...can be deadly long!", "\n")) + if(!"quarter" %in% colnames(all.merged)) + all.merged$quarter <- factor(substr(quarters(as.POSIXct(all.merged$SI_DATE)),2,2)) + + # create the general folder + dir.create(file.path(output), recursive = TRUE ) + dir.create(file.path(output, "jpegEffort")) + + # detect the year + a.year <- format(strptime( paste(all.merged$SI_DATE[1]) , tz='GMT', "%e/%m/%Y" ), "%Y") + + + # in overall + cat(paste("overall...", "\n")) + df1 <- all.merged[all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")] + df1$SI_LATI <- anf(df1$SI_LATI ) # debug... + df1$SI_LONG <- anf(df1$SI_LONG ) # debug... + df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug... + df1 <- df1[ !is.na(df1$SI_LATI),] + df1$LE_EFF_VMS <- anf(df1$LE_EFF_VMS)/ 60 # in hours + vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS", numCats=10, plotPoints =FALSE, legendtitle="fishing (hours)", + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0 + , legendncol=2) + dir.create(file.path(output, "jpegEffort", "overall")) + savePlot(filename = file.path("C:","VMSanalysis", "FemernBelt", "jpegEffort", "overall", + paste("map_effort_hours_merged_vessels_",a.year,".jpeg",sep="")),type ="jpeg") + dev.off() + + + # per quarter + cat(paste("per quarter...", "\n")) + for (a.quarter in levels(all.merged$quarter) ){ + df1 <- all.merged[all.merged$quarter==a.quarter & + all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")] + df1$SI_LATI <- anf(df1$SI_LATI ) # debug... + df1$SI_LONG <- anf(df1$SI_LONG ) # debug... + df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug... + df1 <- df1[ !is.na(df1$SI_LATI),] + df1$LE_EFF_VMS <- anf(df1$LE_EFF_VMS)/ 60 # in hours + if(nrow(df1)!=0){ + vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS", numCats=10, plotPoints =FALSE, legendtitle="fishing (hours)", + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0 + , legendncol=2) + title(paste(a.year, "-", a.quarter) ) + # create folder and save + dir.create(file.path(output, "jpegEffort", "quarter")) + savePlot(filename = file.path(output, "jpegEffort", "quarter", + paste("map_effort_hours_merged_vessels_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg") + dev.off() + } + } + + + # per metier + cat(paste("per metier...", "\n")) + for (met in levels(all.merged$LE_MET_level6) ){ + df1 <- all.merged[all.merged$LE_MET_level6==met & + all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")] + df1$SI_LATI <- anf(df1$SI_LATI ) # debug... + df1$SI_LONG <- anf(df1$SI_LONG ) # debug... + df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug... + df1 <- df1[ !is.na(df1$SI_LATI),] + df1$LE_EFF_VMS <- anf(df1$LE_EFF_VMS)/ 60 # in hours + if(nrow(df1)!=0){ + vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS", numCats=10, plotPoints =FALSE, legendtitle="fishing (hours)", + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0 + , legendncol=2) + title(paste(met) ) + a.met <- gsub(">=", replacement="o",met) # debug + a.met <- gsub("<", replacement="",a.met) # debug + a.met <- gsub(">", replacement="o",a.met) # debug + # create folders and save + dir.create(file.path(output, "jpegEffort", "overall", a.met)) + savePlot(filename = file.path(output, "jpegEffort","overall", a.met, + paste("map_effort_hours_merged_vessels_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg") + dev.off() + } + } + + # per metier, quarter + cat(paste("per metier, per quarter...", "\n")) + for (met in levels(all.merged$LE_MET_level6) ){ + for (a.quarter in levels(all.merged$quarter) ){ + + df1 <- all.merged[all.merged$LE_MET_level6==met & + all.merged$quarter==a.quarter & + all.merged$SI_STATE==1, c("SI_LATI","SI_LONG","LE_EFF_VMS")] + df1$SI_LATI <- anf(df1$SI_LATI ) # debug... + df1$SI_LONG <- anf(df1$SI_LONG ) # debug... + df1$LE_EFF_VMS <-anf(df1$LE_EFF_VMS) # debug... + df1 <- df1[ !is.na(df1$SI_LATI),] + df1$LE_EFF_VMS <- anf(df1$LE_EFF_VMS)/ 60 # in hours + if(nrow(df1)!=0){ + vmsGridCreate(df1, nameVarToSum="LE_EFF_VMS", numCats=10, plotPoints =FALSE, legendtitle="fishing (hours)", + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0 + , legendncol=2) + title(paste(met, "-", a.year, "-", a.quarter) ) + a.met <- gsub(">=", replacement="o",met) # debug + a.met <- gsub("<", replacement="",a.met) # debug + a.met <- gsub(">", replacement="o",a.met)# debug + dir.create(file.path(output, "jpegEffort", "quarter", a.met)) + # save + savePlot(filename = file.path(output, "jpegEffort", "quarter", a.met, + paste("map_effort_hours_merged_vessels_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg") + dev.off() + } + }} + + return() + } + + +# pings2EffortMaps (all.merged=all.merged, output= file.path("C:","VMSanalysis", "FemernBelt"), +# cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, +# breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000)) diff --git a/vmstools/R/pings2Fishframe.r b/vmstools/R/pings2Fishframe.r index 697119f..2f5b22c 100644 --- a/vmstools/R/pings2Fishframe.r +++ b/vmstools/R/pings2Fishframe.r @@ -1,254 +1,290 @@ - - - pings2Fishframe <- function(general=list(output.path= - file.path("H:","DIFRES","VMSanalysis","results_merged","DKWaters"), - a.year=2009, a.country="DNK", degree=0.05)){ - - - # TO FISHFRAME (author: F. Bastardie) - # create the VE table to upload in fishframe - # required: the data.table package - # optional: the "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library) - mergedTable2FishframeVE <- function (general=list(output.path=file.path("C:","output"), - a.year=2009, a.country="DNK", degree=0.05),...){ - lstargs <- list(...) - - for(what in c('value','weight')){ - load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep=''))) - nm <- colnames(all.merged) - if (what=='value') idx.col <- grep('EURO', nm) # index columns with species - if (what=='weight') idx.col <- grep('KG', nm) # index columns with species - assign(what, apply(all.merged[,idx.col], 1, sum, na.rm=TRUE)) - } - all.merged$totvalue <- value - all.merged$totweight <- weight - - # debug - all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA" - - an <- function(x) as.numeric(as.character(x)) - all.merged$SI_LONG <- an( all.merged$SI_LONG) - all.merged$SI_LATI <- an( all.merged$SI_LATI) - all.merged <- all.merged[, !colnames(all.merged) %in% c('fuelcons', 'flag')] - all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings - nm <- colnames(all.merged) - if(length(lstargs$spatialPolygons)==0) { - data(ICESareas) - all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas) - }else{ - all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons) - } - all.merged$c_square <- CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree) - all.merged$month <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m")) # add month - all.merged$LE_EFF_VMS <- an(all.merged$LE_EFF_VMS) / 24 # convert in hours - all.merged <- all.merged[,c("LE_EFF_VMS","KW_HOURS","totvalue", "totweight", "LE_MET_level6","ICES_area","c_square","month")] - all.merged$c_square <- factor(all.merged$c_square) - all.merged$ICES_area <- factor(all.merged$ICES_area) - - - # base::aggregate() replaced by fast grouping using the data.table library - library(data.table) - DT <- data.table(all.merged) - qu <- quote(list(sum(an(LE_EFF_VMS)),sum(an(KW_HOURS)),sum(an(totvalue)),sum(an(totweight)))) - ff.ve <- DT[,eval(qu), by=list(c_square,ICES_area, month,LE_MET_level6)] - colnames(ff.ve) <- c('c_square','ICES_area', 'month','LE_MET_level6','hours','kw_hours', 'totvalue','totweight') - - # additional - ff.ve$year <- general$a.year - ff.ve$country <- general$a.country - ff.ve$nationalFAC <- " " - ff.ve$recordtype <- "VE" - ff.ve$quarter <- ff.ve$month # init - levels(ff.ve$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4) - - - - #order colums - ff.ve <- as.data.frame(ff.ve)[, c('recordtype','country','year','quarter', 'month', - 'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6', - 'hours','kw_hours','totweight','totvalue')] - # save - write.table(ff.ve, file=file.path(general$output.path, - paste("ff_ve_", general$a.year, ".txt", sep='')), dec=".", sep=";", quote=FALSE, row.names=FALSE) - - return(ff.ve) - } - - # TO FISHFRAME (author: F. Bastardie) - # create the VSL table to upload in fishframe - # require: the 'data.table' and 'doBy' packages - # optional: the "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library) - mergedTable2FishframeVSL <- function (general=list(output.path=file.path("C:","output"), - a.year=2009, a.country="DNK", degree=0.05),...){ - lstargs <- list(...) - - an <- function (x) as.numeric(as.character(x)) - - # reshape in 'long' format - # 1. load - what <- "weight" - load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep=''))) - all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA" # debug - nm <- colnames(all.merged) - idx.col <- grep('KG', nm) # index columns with species - all.merged$SI_LONG <- an( all.merged$SI_LONG) - all.merged$SI_LATI <- an( all.merged$SI_LATI) - all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings - if(length(lstargs$spatialPolygons)==0) { - all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas) - }else{ - data(ICESareas) - all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons) - } - all.merged$c_square <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree)) - all.merged$month <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m")) # add month - nm1 <- colnames(all.merged) - idx.c <- which (nm1 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month")) - xx1 <- all.merged [, c(idx.c,idx.col)] - colnames(xx1) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') ) - - what <- "value" - load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep=''))) - all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA" # debug - nm <- colnames(all.merged) - idx.col <- grep('EURO', nm) # index columns with species - all.merged$SI_LONG <- an( all.merged$SI_LONG) - all.merged$SI_LATI <- an( all.merged$SI_LATI) - all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings - if(length(lstargs$spatialPolygons)==0) { - data(ICESareas) - all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas) - }else{ - all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons) - } - all.merged$c_square <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree)) - all.merged$month <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m")) # add month - nm2 <- colnames(all.merged) - idx.c <- which (nm2 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month")) - xx2 <- all.merged [, c(idx.c,idx.col)] - colnames(xx2) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') ) - - # 2. order before splitting in sub-blocks because aggregate() afterwards - library(doBy) - xx1 <- orderBy(~c_square+LE_MET_level6+month, data=xx1) - xx2 <- orderBy(~c_square+LE_MET_level6+month, data=xx2) - - # 3. reshape => 'wide' to 'long' format - # (tricky because sub-block by sub-block because of potential 'out of memory') - res <- NULL - lev <- as.character(levels(xx1$c_square)) # do not split randomly but consistently with levels - chunk <- c( seq(1, length(lev), by=2000), length(lev)) # 2000 by 2000 levels... - for(i in 1: (length(chunk)-1)){ - rm(vsl.ff1,vsl.ff2,vsl.ff) ; gc(reset=TRUE) - cat(paste("level c_square",chunk[i],"to",chunk[i+1] ,"\n")) - vsl.ff1 <- reshape( xx1[xx1$c_square %in% lev[chunk[i]:chunk[i+1]] , ] , - direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format - colnames(vsl.ff1) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "weight","id") - vsl.ff1$species <- factor (vsl.ff1$species) - get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) - levels(vsl.ff1$species) <- get.sp(nm1[idx.col]) - - vsl.ff2 <- reshape( xx2[ xx2$c_square %in% lev[chunk[i]:chunk[i+1]] , ] , - direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format - colnames(vsl.ff2) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "value","id") - vsl.ff2$species <- factor (vsl.ff2$species) - nm <- colnames(xx2) - get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) - levels(vsl.ff2$species) <- get.sp(nm2[idx.col]) - - # 4. cbind - vsl.ff <- cbind.data.frame(vsl.ff1, vsl.ff2$value) - - # 5. clean up - vsl.ff <- vsl.ff[!is.na(vsl.ff$weight) & vsl.ff$weight!=0,] - vsl.ff <- vsl.ff[, !colnames(vsl.ff) %in% c('id')] - colnames(vsl.ff) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area", "c_square","month", "species", "weight", "value") - - # 6. aggregate with fast grouping (caution: > R.2.11.0) - library(data.table) - vsl.ff$ICES_area <- factor(vsl.ff$ICES_area) - DT <- data.table(vsl.ff) - qu <- quote(list(sum(an(weight)),sum(an(value)))) - vsl.ff <- DT[,eval(qu), by=list(species,ICES_area,c_square,month,LE_MET_level6)] - colnames(vsl.ff ) <- c('species','ICES_area','c_square','month','LE_MET_level6','weight','value') - - # 7. bind all chunks - res <- rbind.data.frame(res, vsl.ff) - } - - # 8. additional - res$year <- general$a.year - res$country <- general$a.country - res$nationalFAC <- " " - res$recordtype <- "VSL" - res$quarter <- res$month # init - levels(res$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4) - - # 9. convert species fao code to fishframe latin species names - data(speciesLatinNames) - res$species <- speciesLatinNames$ff_species_latin[match(as.character(res$species), - as.character(speciesLatinNames$fao_code))] - - - # 10. order colums - ff.vsl <- res - ff.vsl <- as.data.frame(ff.vsl)[, c('recordtype','country','year','quarter', 'month', - 'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6', - 'species','weight','value')] - # 11. save - write.table(ff.vsl, file=file.path(general$output.path, - paste("ff_vsl_", general$a.year, ".txt", sep='')), dec=".", sep=";", quote=FALSE, row.names=FALSE) - - - return(ff.vsl) - } - - - - - # GENERAL CALLS - ve <- mergedTable2FishframeVE (general=general) - vsl <- mergedTable2FishframeVSL (general=general) - - - # add a fake column to get the same ncol() - vsl <- cbind(vsl, 0) - colnames(ve) <- paste('col', 1:ncol(ve), sep='') - colnames(vsl) <- paste('col', 1:ncol(vsl), sep='') - - # bind and order - #(to get a VE line and then VSL lines, VE and then VSL lines, etc.) - ff <- rbind(ve,vsl) - library(doBy) - ff <- orderBy(~col7+col9+col5+col6+col1, data=ff) - - # round the numbers - ff[, c(11,12,13)] <- ceiling(ff[, c(11,12,13)]) - - # remove record with 0 in value because both - # weight and value are mandatory for uploading in fishframe - # but this should not have any effect here because - # filling the gap should have been performed before on eflalo... - ff <- ff[ff[, c(12)]!=0,] - - # save - write.table(ff, file=file.path(general$output.path, - paste(general$a.country,"_", general$a.year, "VD.csv", sep='')), dec=".", sep=";", - quote=FALSE, row.names=FALSE, col.names=FALSE) - - - return(ff) - } - - -# example calls -# vsl <- mergedTable2FishframeVSL (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"), -# a.year=2009, a.country="DNK", degree=0.05) ) -# ve <- mergedTable2FishframeVE (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"), -# a.year=2009, a.country="DNK", degree=0.05) ) - -# alternatively: -#for (a_year in as.character(2005:2010)) -# ff <- pings2Fishframe (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"), -# a.year=a_year, a.country="DNK", degree=0.01) ) - +#' convert the merged (VMS + logbooks) data.frame to comply with the fishframe +#' format +#' +#' the merged data.frame coming from mergeEflalo2Pings() can be further +#' converted to be uploaded in the fishframe data warehouse. This includes the +#' aggregation of fishing pings (VMS effort and logbook landings) by CSquare +#' area code and month. +#' +#' The function first starts by searching the input data.frame in the +#' output.path folder. The result will be saved in the output.path folder as +#' well. the 'data.table' package is required to perform the aggregation of the +#' data.frame much faster. An area code is also added (e.g. from the ICESarea() +#' function). This latter step is costly in terms of computation time due to +#' the spatial polygon inclusion test performed for a lot of VMS positions +#' (across a lot of polygons!) and this explains the current poor time +#' performance of the whole function. +#' +#' @param general a list with general settings, e.g. the output.path +#' @param a.year a year to be inserted in the csv +#' @param a.country a country to be inserted in the csv +#' @param degree XY grid resolution in degree +#' @author Francois Bastardie +#' @examples +#' +#' \dontrun{ +#' data(tacsat) +#' dir.create("C:/output/") +#' # CONVERT TO FISHFRAME FORMAT (might take some time running) +#' # (by default, this will keep all the species in the output table) +#' tmp <- bindAllMergedTables (vessels= unique(tacsat$VE_REF), +#' species.to.keep=character(),a.year="1800", +#' folder = file.path("C:","output"), all.in.one.table=FALSE) +#' +#' ff <- pings2Fishframe (general=list(output.path=file.path("C:","output"), +#' a.year=1800, a.country="Atlantis") ) +#' +#' } +#' @export pings2Fishframe + pings2Fishframe <- function(general=list(output.path= + file.path("H:","DIFRES","VMSanalysis","results_merged","DKWaters"), + a.year=2009, a.country="DNK", degree=0.05)){ + + + # TO FISHFRAME (author: F. Bastardie) + # create the VE table to upload in fishframe + # required: the data.table package + # optional: the "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library) + mergedTable2FishframeVE <- function (general=list(output.path=file.path("C:","output"), + a.year=2009, a.country="DNK", degree=0.05),...){ + lstargs <- list(...) + + for(what in c('value','weight')){ + load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep=''))) + nm <- colnames(all.merged) + if (what=='value') idx.col <- grep('EURO', nm) # index columns with species + if (what=='weight') idx.col <- grep('KG', nm) # index columns with species + assign(what, apply(all.merged[,idx.col], 1, sum, na.rm=TRUE)) + } + all.merged$totvalue <- value + all.merged$totweight <- weight + + # debug + all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA" + + an <- function(x) as.numeric(as.character(x)) + all.merged$SI_LONG <- an( all.merged$SI_LONG) + all.merged$SI_LATI <- an( all.merged$SI_LATI) + all.merged <- all.merged[, !colnames(all.merged) %in% c('fuelcons', 'flag')] + all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings + nm <- colnames(all.merged) + if(length(lstargs$spatialPolygons)==0) { + data(ICESareas) + all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas) + }else{ + all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons) + } + all.merged$c_square <- CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree) + all.merged$month <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m")) # add month + all.merged$LE_EFF_VMS <- an(all.merged$LE_EFF_VMS) / 24 # convert in hours + all.merged <- all.merged[,c("LE_EFF_VMS","KW_HOURS","totvalue", "totweight", "LE_MET_level6","ICES_area","c_square","month")] + all.merged$c_square <- factor(all.merged$c_square) + all.merged$ICES_area <- factor(all.merged$ICES_area) + + + # base::aggregate() replaced by fast grouping using the data.table library + library(data.table) + DT <- data.table(all.merged) + qu <- quote(list(sum(an(LE_EFF_VMS)),sum(an(KW_HOURS)),sum(an(totvalue)),sum(an(totweight)))) + ff.ve <- DT[,eval(qu), by=list(c_square,ICES_area, month,LE_MET_level6)] + colnames(ff.ve) <- c('c_square','ICES_area', 'month','LE_MET_level6','hours','kw_hours', 'totvalue','totweight') + + # additional + ff.ve$year <- general$a.year + ff.ve$country <- general$a.country + ff.ve$nationalFAC <- " " + ff.ve$recordtype <- "VE" + ff.ve$quarter <- ff.ve$month # init + levels(ff.ve$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4) + + + + #order colums + ff.ve <- as.data.frame(ff.ve)[, c('recordtype','country','year','quarter', 'month', + 'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6', + 'hours','kw_hours','totweight','totvalue')] + # save + write.table(ff.ve, file=file.path(general$output.path, + paste("ff_ve_", general$a.year, ".txt", sep='')), dec=".", sep=";", quote=FALSE, row.names=FALSE) + + return(ff.ve) + } + + # TO FISHFRAME (author: F. Bastardie) + # create the VSL table to upload in fishframe + # require: the 'data.table' and 'doBy' packages + # optional: the "areas" shape file for ICESarea()(if not provided as arg then loaded from the vmstools library) + mergedTable2FishframeVSL <- function (general=list(output.path=file.path("C:","output"), + a.year=2009, a.country="DNK", degree=0.05),...){ + lstargs <- list(...) + + an <- function (x) as.numeric(as.character(x)) + + # reshape in 'long' format + # 1. load + what <- "weight" + load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep=''))) + all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA" # debug + nm <- colnames(all.merged) + idx.col <- grep('KG', nm) # index columns with species + all.merged$SI_LONG <- an( all.merged$SI_LONG) + all.merged$SI_LATI <- an( all.merged$SI_LATI) + all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings + if(length(lstargs$spatialPolygons)==0) { + all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas) + }else{ + data(ICESareas) + all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons) + } + all.merged$c_square <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree)) + all.merged$month <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m")) # add month + nm1 <- colnames(all.merged) + idx.c <- which (nm1 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month")) + xx1 <- all.merged [, c(idx.c,idx.col)] + colnames(xx1) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') ) + + what <- "value" + load(file.path(general$output.path, paste("all_merged_",what,"_",general$a.year,".RData",sep=''))) + all.merged[all.merged$LE_MET_level6=="", "LE_MET_level6"] <-"NA" # debug + nm <- colnames(all.merged) + idx.col <- grep('EURO', nm) # index columns with species + all.merged$SI_LONG <- an( all.merged$SI_LONG) + all.merged$SI_LATI <- an( all.merged$SI_LATI) + all.merged <- all.merged[all.merged$SI_STATE==1,] # keep only fishing pings + if(length(lstargs$spatialPolygons)==0) { + data(ICESareas) + all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=ICESareas) + }else{ + all.merged$ICES_area <- ICESarea (all.merged[,c('SI_LONG','SI_LATI')], areas=lstargs$spatialPolygons) + } + all.merged$c_square <- factor(CSquare(an(all.merged$SI_LONG), an(all.merged$SI_LATI), degrees=general$degree)) + all.merged$month <- factor(format(as.POSIXct(all.merged$SI_DATE), "%m")) # add month + nm2 <- colnames(all.merged) + idx.c <- which (nm2 %in% c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month")) + xx2 <- all.merged [, c(idx.c,idx.col)] + colnames(xx2) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", paste( "sp", 1:length(idx.col),sep='') ) + + # 2. order before splitting in sub-blocks because aggregate() afterwards + library(doBy) + xx1 <- orderBy(~c_square+LE_MET_level6+month, data=xx1) + xx2 <- orderBy(~c_square+LE_MET_level6+month, data=xx2) + + # 3. reshape => 'wide' to 'long' format + # (tricky because sub-block by sub-block because of potential 'out of memory') + res <- NULL + lev <- as.character(levels(xx1$c_square)) # do not split randomly but consistently with levels + chunk <- c( seq(1, length(lev), by=2000), length(lev)) # 2000 by 2000 levels... + for(i in 1: (length(chunk)-1)){ + rm(vsl.ff1,vsl.ff2,vsl.ff) ; gc(reset=TRUE) + cat(paste("level c_square",chunk[i],"to",chunk[i+1] ,"\n")) + vsl.ff1 <- reshape( xx1[xx1$c_square %in% lev[chunk[i]:chunk[i+1]] , ] , + direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format + colnames(vsl.ff1) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "weight","id") + vsl.ff1$species <- factor (vsl.ff1$species) + get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) + levels(vsl.ff1$species) <- get.sp(nm1[idx.col]) + + vsl.ff2 <- reshape( xx2[ xx2$c_square %in% lev[chunk[i]:chunk[i+1]] , ] , + direction="long", varying=7:(6+length(idx.col)), sep="") # 'long' format + colnames(vsl.ff2) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area","c_square","month", "species", "value","id") + vsl.ff2$species <- factor (vsl.ff2$species) + nm <- colnames(xx2) + get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) + levels(vsl.ff2$species) <- get.sp(nm2[idx.col]) + + # 4. cbind + vsl.ff <- cbind.data.frame(vsl.ff1, vsl.ff2$value) + + # 5. clean up + vsl.ff <- vsl.ff[!is.na(vsl.ff$weight) & vsl.ff$weight!=0,] + vsl.ff <- vsl.ff[, !colnames(vsl.ff) %in% c('id')] + colnames(vsl.ff) <- c('VE_REF', 'FT_REF',"LE_MET_level6","ICES_area", "c_square","month", "species", "weight", "value") + + # 6. aggregate with fast grouping (caution: > R.2.11.0) + library(data.table) + vsl.ff$ICES_area <- factor(vsl.ff$ICES_area) + DT <- data.table(vsl.ff) + qu <- quote(list(sum(an(weight)),sum(an(value)))) + vsl.ff <- DT[,eval(qu), by=list(species,ICES_area,c_square,month,LE_MET_level6)] + colnames(vsl.ff ) <- c('species','ICES_area','c_square','month','LE_MET_level6','weight','value') + + # 7. bind all chunks + res <- rbind.data.frame(res, vsl.ff) + } + + # 8. additional + res$year <- general$a.year + res$country <- general$a.country + res$nationalFAC <- " " + res$recordtype <- "VSL" + res$quarter <- res$month # init + levels(res$quarter) <- c(1,1,1,2,2,2,3,3,3,4,4,4) + + # 9. convert species fao code to fishframe latin species names + data(speciesLatinNames) + res$species <- speciesLatinNames$ff_species_latin[match(as.character(res$species), + as.character(speciesLatinNames$fao_code))] + + + # 10. order colums + ff.vsl <- res + ff.vsl <- as.data.frame(ff.vsl)[, c('recordtype','country','year','quarter', 'month', + 'ICES_area','c_square', 'nationalFAC', 'LE_MET_level6', + 'species','weight','value')] + # 11. save + write.table(ff.vsl, file=file.path(general$output.path, + paste("ff_vsl_", general$a.year, ".txt", sep='')), dec=".", sep=";", quote=FALSE, row.names=FALSE) + + + return(ff.vsl) + } + + + + + # GENERAL CALLS + ve <- mergedTable2FishframeVE (general=general) + vsl <- mergedTable2FishframeVSL (general=general) + + + # add a fake column to get the same ncol() + vsl <- cbind(vsl, 0) + colnames(ve) <- paste('col', 1:ncol(ve), sep='') + colnames(vsl) <- paste('col', 1:ncol(vsl), sep='') + + # bind and order + #(to get a VE line and then VSL lines, VE and then VSL lines, etc.) + ff <- rbind(ve,vsl) + library(doBy) + ff <- orderBy(~col7+col9+col5+col6+col1, data=ff) + + # round the numbers + ff[, c(11,12,13)] <- ceiling(ff[, c(11,12,13)]) + + # remove record with 0 in value because both + # weight and value are mandatory for uploading in fishframe + # but this should not have any effect here because + # filling the gap should have been performed before on eflalo... + ff <- ff[ff[, c(12)]!=0,] + + # save + write.table(ff, file=file.path(general$output.path, + paste(general$a.country,"_", general$a.year, "VD.csv", sep='')), dec=".", sep=";", + quote=FALSE, row.names=FALSE, col.names=FALSE) + + + return(ff) + } + + +# example calls +# vsl <- mergedTable2FishframeVSL (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"), +# a.year=2009, a.country="DNK", degree=0.05) ) +# ve <- mergedTable2FishframeVE (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"), +# a.year=2009, a.country="DNK", degree=0.05) ) + +# alternatively: +#for (a_year in as.character(2005:2010)) +# ff <- pings2Fishframe (general=list(output.path=file.path("C:","merging", "EflaloAndTacsat"), +# a.year=a_year, a.country="DNK", degree=0.01) ) + diff --git a/vmstools/R/pings2LandingsMaps.r b/vmstools/R/pings2LandingsMaps.r index 7c12791..6956cd1 100644 --- a/vmstools/R/pings2LandingsMaps.r +++ b/vmstools/R/pings2LandingsMaps.r @@ -1,124 +1,182 @@ - # Author: F.Bastardie - pings2LandingsMaps <- - function (all.merged, sp="LE_KG_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), - cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, - breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)){ - - if(!"quarter" %in% colnames(all.merged)) - all.merged$quarter <- factor(substr(quarters(as.POSIXct(all.merged$SI_DATE)),2,2)) - - # create the general folder - dir.create(file.path(output), recursive = TRUE ) - dir.create(file.path(output, "jpegLandings")) - - # detect the year - a.year <- format(strptime( paste(all.merged$SI_DATE[1]) , tz='GMT', "%e/%m/%Y" ), "%Y") - - if (any(grep("EURO", sp)>0)) what <- "value" - if (any(grep("KG", sp)>0)) what <- "weight" - if(what=="weight") a.unit <- "(KG)" - if(what=="value") a.unit <- "(EURO)" - - get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) - a.sp <- get.sp(sp) - - df1 <- all.merged[all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] - df1$SI_LATI <- anf(df1$SI_LATI ) # debug... - df1$SI_LONG <- anf(df1$SI_LONG ) # debug... - df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) - vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what, a.unit,sep=' '), - colLand="darkolivegreen4", addICESgrid=TRUE, - nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, - breaks0=breaks0, legendncol=2) - title(a.sp) - # create folders and save - dir.create(file.path(output, "jpegLandings", a.sp)) - dir.create(file.path(output, "jpegLandings", a.sp, "overall")) - dir.create(file.path(output, "jpegLandings", a.sp, "overall",what)) - savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what, - paste("map_landings_",what,"_merged_vessels_",a.sp,"_",a.year,".jpeg",sep="")),type ="jpeg") - - dev.off() - - - # per quarter - for (a.quarter in levels(all.merged$quarter) ){ - df1 <- all.merged[all.merged$quarter==a.quarter & - all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] - df1$SI_LATI <- anf(df1$SI_LATI) # debug... - df1$SI_LONG <- anf(df1$SI_LONG) # debug... - df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) - if(nrow(df1)!=0){ - vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '), - colLand="darkolivegreen4", addICESgrid=TRUE, - nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, - breaks0=breaks0,legendncol=2) - title(paste( a.sp, "-", a.year, "-", a.quarter) ) - # create folder and save - dir.create(file.path(output, "jpegLandings", a.sp, "overall", what, "quarter")) - savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what, "quarter", - paste("map_landings_",what,"_merged_vessels_",a.sp,"_overall_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg") - dev.off() - } - } - - - # per metier - for (met in levels(all.merged$LE_MET_level6) ){ - df1 <- all.merged[all.merged$LE_MET_level6==met & - all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] - df1$SI_LATI <- anf(df1$SI_LATI) # debug... - df1$SI_LONG <- anf(df1$SI_LONG) # debug... - df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) - if(nrow(df1)!=0){ - vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '), - colLand="darkolivegreen4", addICESgrid=TRUE, - nameLon="SI_LONG", nameLat="SI_LATI",cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, - breaks0=breaks0,legendncol=2) - title(paste(met, "-", a.sp) ) - a.met <- gsub(">=", replacement="o",met) # debug - a.met <- gsub("<", replacement="",a.met) # debug - a.met <- gsub(">", replacement="o",a.met) # debug - # create folders and save - dir.create(file.path(output, "jpegLandings", a.sp, a.met)) - dir.create(file.path(output, "jpegLandings", a.sp, a.met, what)) - savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what, - paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg") - dev.off() - } - } - - # per metier, quarter - for (met in levels(all.merged$LE_MET_level6) ){ - for (a.quarter in levels(all.merged$quarter) ){ - - df1 <- all.merged[all.merged$LE_MET_level6==met & - all.merged$quarter==a.quarter & - all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] - df1$SI_LATI <- anf(df1$SI_LATI) # debug... - df1$SI_LONG <- anf(df1$SI_LONG) # debug... - df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) - if(nrow(df1)!=0){ - vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '), - colLand="darkolivegreen4", addICESgrid=TRUE, - nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, - breaks0=breaks0,legendncol=2) - title(paste(met, "-", a.sp, "-", a.year, "-", a.quarter) ) - a.met <- gsub(">=", replacement="o",met) # debug - a.met <- gsub("<", replacement="",a.met) # debug - a.met <- gsub(">", replacement="o",a.met)# debug - dir.create(file.path(output, "jpegLandings", a.sp, a.met, what, "quarter")) - # save - savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what, "quarter", - paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg") - dev.off() - } - }} - - return() - } - # in value - # pings2LandingsMaps (all.merged=all.merged, sp="LE_EURO_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), - # cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area - # breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000) - # ) + # Author: F.Bastardie + + +#' generate some landings maps from the merged output table +#' +#' generate landings maps from the merged table per year, per metier (DCF +#' Level6), per metier-quarter all together +#' +#' using some arguments informing mapGrid(). Performing automatic creation of a +#' hierarchy of output folders. +#' +#' @param all.merged the data.frame from merged VMS to logbooks +#' @param sp sp name (FAO code) +#' @param output a path for the output to be located +#' @param cellsizeX additional arguments for mapGrid +#' @param cellsizeY additional arguments for mapGrid +#' @param we additional arguments for mapGrid +#' @param ea additional arguments for mapGrid +#' @param no additional arguments for mapGrid +#' @param so additional arguments for mapGrid +#' @param breaks0 additional arguments for mapGrid +#' @author Francois Bastardie +#' @examples +#' +#' +#' \dontrun{ +#' +#' years= c('1800') +#' +#' for(a.year in years){ +#' +#' graphics.off() +#' +#' # ...and load the merged output table for all vessels +#' load(file.path("C:","output",paste("all_merged__",a.year,".RData",sep=''))) +#' +#' # generate the effort maps (jpeg files) and store in a hierarchy of folders +#' pings2EffortMaps (all.merged=all.merged, output= +#' file.path("C:","output"), +#' cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54, +#' breaks0=c(0,25, 50,100,200,400,800,1600, 3200,6400,12800, 100000)) +#' +#' # generate the landings maps (jpeg) and store in a hierarchy of folders +#' pings2LandingsMaps (all.merged=all.merged, sp="LE_KG_COD", +#' output= file.path("C:","output"), +#' cellsizeX =0.1, cellsizeY =0.05, we = 3, ea = 6, so = 50, no = 54, +#' breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4), +#' 100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)) +#' +#' } +#' +#' # create some animations from the sequential jpeg files... +#' # (need the 'animation' R package and ImageMagik installed) +#' landingsMaps2GIFanim(idir = file.path("C:","output"), spp=c("COD") ) +#' +#' +#' } +#' +#' @export pings2LandingsMaps + pings2LandingsMaps <- + function (all.merged, sp="LE_KG_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), + cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, + breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000)){ + + if(!"quarter" %in% colnames(all.merged)) + all.merged$quarter <- factor(substr(quarters(as.POSIXct(all.merged$SI_DATE)),2,2)) + + # create the general folder + dir.create(file.path(output), recursive = TRUE ) + dir.create(file.path(output, "jpegLandings")) + + # detect the year + a.year <- format(strptime( paste(all.merged$SI_DATE[1]) , tz='GMT', "%e/%m/%Y" ), "%Y") + + if (any(grep("EURO", sp)>0)) what <- "value" + if (any(grep("KG", sp)>0)) what <- "weight" + if(what=="weight") a.unit <- "(KG)" + if(what=="value") a.unit <- "(EURO)" + + get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) + a.sp <- get.sp(sp) + + df1 <- all.merged[all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] + df1$SI_LATI <- anf(df1$SI_LATI ) # debug... + df1$SI_LONG <- anf(df1$SI_LONG ) # debug... + df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) + vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what, a.unit,sep=' '), + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0, legendncol=2) + title(a.sp) + # create folders and save + dir.create(file.path(output, "jpegLandings", a.sp)) + dir.create(file.path(output, "jpegLandings", a.sp, "overall")) + dir.create(file.path(output, "jpegLandings", a.sp, "overall",what)) + savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what, + paste("map_landings_",what,"_merged_vessels_",a.sp,"_",a.year,".jpeg",sep="")),type ="jpeg") + + dev.off() + + + # per quarter + for (a.quarter in levels(all.merged$quarter) ){ + df1 <- all.merged[all.merged$quarter==a.quarter & + all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] + df1$SI_LATI <- anf(df1$SI_LATI) # debug... + df1$SI_LONG <- anf(df1$SI_LONG) # debug... + df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) + if(nrow(df1)!=0){ + vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '), + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0,legendncol=2) + title(paste( a.sp, "-", a.year, "-", a.quarter) ) + # create folder and save + dir.create(file.path(output, "jpegLandings", a.sp, "overall", what, "quarter")) + savePlot(filename = file.path(output, "jpegLandings", a.sp, "overall", what, "quarter", + paste("map_landings_",what,"_merged_vessels_",a.sp,"_overall_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg") + dev.off() + } + } + + + # per metier + for (met in levels(all.merged$LE_MET_level6) ){ + df1 <- all.merged[all.merged$LE_MET_level6==met & + all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] + df1$SI_LATI <- anf(df1$SI_LATI) # debug... + df1$SI_LONG <- anf(df1$SI_LONG) # debug... + df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) + if(nrow(df1)!=0){ + vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '), + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI",cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0,legendncol=2) + title(paste(met, "-", a.sp) ) + a.met <- gsub(">=", replacement="o",met) # debug + a.met <- gsub("<", replacement="",a.met) # debug + a.met <- gsub(">", replacement="o",a.met) # debug + # create folders and save + dir.create(file.path(output, "jpegLandings", a.sp, a.met)) + dir.create(file.path(output, "jpegLandings", a.sp, a.met, what)) + savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what, + paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year,".jpeg",sep="")),type ="jpeg") + dev.off() + } + } + + # per metier, quarter + for (met in levels(all.merged$LE_MET_level6) ){ + for (a.quarter in levels(all.merged$quarter) ){ + + df1 <- all.merged[all.merged$LE_MET_level6==met & + all.merged$quarter==a.quarter & + all.merged$SI_STATE==1, c("SI_LATI","SI_LONG",sp)] + df1$SI_LATI <- anf(df1$SI_LATI) # debug... + df1$SI_LONG <- anf(df1$SI_LONG) # debug... + df1[,sp] <- replace(df1[,sp], is.na(df1[,sp]) | df1[,sp]<0, 0) + if(nrow(df1)!=0){ + vmsGridCreate(df1, nameVarToSum=sp, numCats=10, plotPoints =FALSE, legendtitle=paste("landings",what,a.unit,sep=' '), + colLand="darkolivegreen4", addICESgrid=TRUE, + nameLon="SI_LONG", nameLat="SI_LATI", cellsizeX =cellsizeX, cellsizeY =cellsizeY, we=we, ea=ea, no=no, so=so, + breaks0=breaks0,legendncol=2) + title(paste(met, "-", a.sp, "-", a.year, "-", a.quarter) ) + a.met <- gsub(">=", replacement="o",met) # debug + a.met <- gsub("<", replacement="",a.met) # debug + a.met <- gsub(">", replacement="o",a.met)# debug + dir.create(file.path(output, "jpegLandings", a.sp, a.met, what, "quarter")) + # save + savePlot(filename = file.path(output, "jpegLandings", a.sp, a.met, what, "quarter", + paste("map_landings_",what,"_merged_vessels_",a.sp,"_", a.met,"_",a.year, a.quarter,".jpeg",sep="")),type ="jpeg") + dev.off() + } + }} + + return() + } + # in value + # pings2LandingsMaps (all.merged=all.merged, sp="LE_EURO_COD", output= file.path("C:","VMSanalysis", "FemernBelt"), + # cellsizeX =0.05, cellsizeY =0.05, we=9.8, ea=12.7, no=55.2, so=54.0, # fehmarn Belt area + # breaks0= c(0,100, 100*(2^1),100*(2^2),100*(2^3),100*(2^4),100*(2^5),100*(2^6), 100*(2^7),100*(2^8),100*(2^9), 10000000) + # ) diff --git a/vmstools/R/plotTools.r b/vmstools/R/plotTools.r index c0ea991..9995912 100644 --- a/vmstools/R/plotTools.r +++ b/vmstools/R/plotTools.r @@ -1,116 +1,179 @@ -plotTools <- function(x,level="ICESrectangle",xlim,ylim,zlim=NULL,log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL),control.eflalo=list(clm=NULL),returnRange=FALSE,las=1){ - require(maps) - require(mapdata) - if(is.null(color)==TRUE) color <- rev(heat.colors(9)) - - #TACSAT - if(all(c("SI_LATI","SI_LONG") %in% colnames(x))){ - #- Get left lower point from ICES rectangle in gps format - x$LE_RECT <- ICESrectangle(x) - - #- Limit the spatial boundaries of tacsat - idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2]) - idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2]) - x <- x[idxx[which(idxx %in% idxy)],] - x$SI_LONG <- af(ac(x$SI_LONG)) - x$SI_LATI <- af(ac(x$SI_LATI)) - if(is.null(control.tacsat$clm)==TRUE){ - control.tacsat$clm <- "idx" - x$idx <- 1:nrow(x) - } - - #--------------------------------------------------------------------------- - #- Sum by rectangle - #--------------------------------------------------------------------------- - if(level == "ICESrectangle"){ - x$SI_LONG <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,2]) - x$SI_LATI <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,1]) - - DT <- data.table(x) - eq1 <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep="")) - eq2 <- c.listquote(c("SI_LONG","SI_LATI")) - - byRect <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm) - byRect <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),] - if(length(control.tacsat$clm)>1) rangeRect <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE)) - if(length(control.tacsat$clm)==1) rangeRect <- range(byRect[,control.tacsat$clm],na.rm=TRUE) - rangeRect <- c(0,rangeRect[2]) - } - #--------------------------------------------------------------------------- - #- Sum by grid cell - #--------------------------------------------------------------------------- - if(level == "gridcell"){ - grids <- createGrid(xlim,ylim,gridcell[1],gridcell[2],type="SpatialPixelsDataFrame") - coords <- SpatialPointsDataFrame(cbind(x=an(ac(x$SI_LONG)),y=an(ac(x$SI_LATI))),data=x) - coords@data$dens <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels")) - - #- Sum by gridcell - DT <- data.table(data.frame(coords)) - DT$x <- af(ac(grids@coords[DT$dens,1])) - DT$y <- af(ac(grids@coords[DT$dens,2])) - - eq1 <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep="")) - eq2 <- c.listquote(c("x","y")) - - byRect <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm) - byRect$SI_LONG <- signif(anf(byRect$SI_LONG)) - byRect$SI_LATI <- signif(anf(byRect$SI_LATI)) - if(length(control.tacsat$clm)>1) rangeRect <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE)) - if(length(control.tacsat$clm)==1) rangeRect <- range(byRect[,control.tacsat$clm],na.rm=TRUE) - rangeRect <- c(0,rangeRect[2]) - } - ctrl <- control.tacsat - } - - - #EFLALO - if(all(c("VE_FLT","VE_KW") %in% colnames(x))){ - if(level != "ICESrectangle") stop("Data supplied is 'eflalo' which only has ICESrectangle") - x$SI_LONG <- ICESrectangle2LonLat(ac(x$LE_RECT))[,2] - x$SI_LATI <- ICESrectangle2LonLat(ac(x$LE_RECT))[,1] - - #- Limit the spatial boundaries of eflalo - idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2]) - idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2]) - x <- x[idxx[which(idxx %in% idxy)],] - - #- Sum by rectangle - if(is.null(control.eflalo$clm)==TRUE) control.eflalo$clm <- colnames(x[,kgeur(colnames(x))]) - DT <- data.table(x) - eq1 <- c.listquote(paste("sum(",control.eflalo$clm,",na.rm=TRUE)",sep="")) - eq2 <- c.listquote(c("SI_LONG","SI_LATI")) - DT$SI_LONG <- af(ac(DT$SI_LONG)); DT$SI_LATI <- af(ac(DT$SI_LATI)) - - byRect <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.eflalo$clm) - byRect <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),] - if(length(control.eflalo$clm)>1) rangeRect <- range(apply(byRect[,control.eflalo$clm],1,sum,na.rm=TRUE)) - if(length(control.eflalo$clm)==1) rangeRect <- range(byRect[,control.eflalo$clm],na.rm=TRUE) - rangeRect <- c(0,rangeRect[2]) - ctrl <- control.eflalo - } - - map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen");axis(1,las=las);axis(2,las=las);box() - for(iRect in 1:nrow(byRect)){ - if(log){ - if(is.null(zlim)==TRUE){ i <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(rangeRect[1]==0,0,log(rangeRect[1]))) - /(log(rangeRect[2]) - ifelse(rangeRect[1]==0,0,log(rangeRect[1])))*(length(color)-1)) +1 - } else { - i <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(zlim[1]==0,0,log(zlim[1]))) - /(log(zlim[2]) - ifelse(zlim[1]==0,0,log(zlim[1]))) *(length(color)-1)) +1 - } - } else { - if(is.null(zlim)==TRUE){ i <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(rangeRect[1]==0,0,rangeRect[1])) - /(rangeRect[2] - ifelse(rangeRect[1]==0,0,rangeRect[1])) *(length(color)-1)) +1 - } else { - i <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(zlim[1]==0,0,zlim[1])) - /(zlim[2] - ifelse(zlim[1]==0,0,zlim[1])) *(length(color)-1)) +1 - } - } - if(level == "ICESrectangle") polygon(x=c(an(ac(byRect[iRect,"SI_LONG"])),an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))), - y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2),rep(an(ac(byRect[iRect,"SI_LATI"]))+0.5,2)),col=color[i],lwd=1,border=NA) - if(level == "gridcell") polygon(x=c(an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2), - y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2)-gridcell[2]/2,rep(an(ac(byRect[iRect,"SI_LATI"]))+gridcell[2]/2,2)),col=color[i],lwd=1,border=NA) - } - map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen",plt=FALSE,add=TRUE);box() - if(returnRange) return(rangeRect) -} \ No newline at end of file +#' Plot eflalo or tacsat files +#' +#' Plot eflalo or tacsat given a grid and column name as a map of intensity +#' +#' +#' @param x Eflalo or tacsat dataframe +#' @param level Aggregating level: "ICESrectangle" or specified "gridcell" +#' between xlim and ylim in steps gricell for tacsat or "ICESrectangle" only +#' for eflalo +#' @param xlim two element numeric vector giving a range of longitudes, +#' expressed in degrees, to which drawing should be restricted. Longitude is +#' measured in degrees east of Greenwich, so that, in particular, locations in +#' the USA have negative longitude. If fill = TRUE, polygons selected by region +#' must be entirely inside the xlim range. The default value of this argument +#' spans the entire longitude range of the database. +#' @param ylim two element numeric vector giving a range of latitudes, +#' expressed in degrees, to which drawing should be restricted. Latitude is +#' measured in degrees north of the equator, so that, in particular, locations +#' in the USA have positive latitude. If fill = TRUE, polygons selected by +#' region must be entirely inside the ylim range. The default value of this +#' argument spans the entire latitude range of the database. +#' @param zlim the minimum and maximum z values for which colors should be +#' plotted, defaulting to the range of the finite values of z. Each of the +#' given colors will be used to color an equispaced interval of this range. The +#' midpoints of the intervals cover the range, so that values just outside the +#' range will be plotted. +#' @param log whether values to plot need to be logged, TRUE or FALSE +#' @param gridcell two element numeric vector giving the steps in longitudinal +#' degrees and steps in latitudinal degrees for the grid to plot on. Only +#' needed when level = "gridcell" +#' @param color colors range. default = brewer.pal(9,"YlOrRd") +#' @param control.tacsat list with attribute "clm": column names to display +#' aggregated results over +#' @param control.eflalo list with attribute "clm": column names to display +#' aggregated results over +#' @param returnRange Logical: return range of plotted values (which can be +#' used to define own legend +#' @param las Direction of axis legends. See ?par for more info +#' @author Niels T. Hintzen +#' @seealso \code{\link{plotTreeMap}}, \code{\link{Grid2KLM}}, +#' \code{\link{landingsMap2GIFanim}}, \code{\link{pings2EffortMaps}}, +#' \code{\link{pings2LandingsMaps}} +#' @examples +#' +#' data(tacsat) +#' data(eflalo) +#' +#' plotTools(tacsat,level="ICESrectangle",xlim=c(-5,10),ylim=c(48,62),zlim=NULL, +#' log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL)) +#' x11() +#' plotTools(eflalo,level="ICESrectangle",xlim=c(-5,10),ylim=c(48,62),zlim=NULL, +#' log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL)) +#' x11() +#' plotTools(tacsat,level="gridcell",xlim=c(-5,10),ylim=c(48,62),zlim=NULL, +#' log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL)) +#' +#' x11() +#' plotTools(eflalo,level="ICESrectangle",xlim=c(-5,10),ylim=c(48,62),zlim=NULL, +#' log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL), +#' control.eflalo=list(clm=c("LE_KG_COD","LE_KG_PLE"))) +#' +#' +#' @export plotTools +plotTools <- function(x,level="ICESrectangle",xlim,ylim,zlim=NULL,log=FALSE,gridcell=c(0.1,0.05),color=NULL,control.tacsat=list(clm=NULL),control.eflalo=list(clm=NULL),returnRange=FALSE,las=1){ + require(maps) + require(mapdata) + if(is.null(color)==TRUE) color <- rev(heat.colors(9)) + + #TACSAT + if(all(c("SI_LATI","SI_LONG") %in% colnames(x))){ + #- Get left lower point from ICES rectangle in gps format + x$LE_RECT <- ICESrectangle(x) + + #- Limit the spatial boundaries of tacsat + idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2]) + idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2]) + x <- x[idxx[which(idxx %in% idxy)],] + x$SI_LONG <- af(ac(x$SI_LONG)) + x$SI_LATI <- af(ac(x$SI_LATI)) + if(is.null(control.tacsat$clm)==TRUE){ + control.tacsat$clm <- "idx" + x$idx <- 1:nrow(x) + } + + #--------------------------------------------------------------------------- + #- Sum by rectangle + #--------------------------------------------------------------------------- + if(level == "ICESrectangle"){ + x$SI_LONG <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,2]) + x$SI_LATI <- af(ICESrectangle2LonLat(ac(x$LE_RECT))[,1]) + + DT <- data.table(x) + eq1 <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep="")) + eq2 <- c.listquote(c("SI_LONG","SI_LATI")) + + byRect <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm) + byRect <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),] + if(length(control.tacsat$clm)>1) rangeRect <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE)) + if(length(control.tacsat$clm)==1) rangeRect <- range(byRect[,control.tacsat$clm],na.rm=TRUE) + rangeRect <- c(0,rangeRect[2]) + } + #--------------------------------------------------------------------------- + #- Sum by grid cell + #--------------------------------------------------------------------------- + if(level == "gridcell"){ + grids <- createGrid(xlim,ylim,gridcell[1],gridcell[2],type="SpatialPixelsDataFrame") + coords <- SpatialPointsDataFrame(cbind(x=an(ac(x$SI_LONG)),y=an(ac(x$SI_LATI))),data=x) + coords@data$dens <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels")) + + #- Sum by gridcell + DT <- data.table(data.frame(coords)) + DT$x <- af(ac(grids@coords[DT$dens,1])) + DT$y <- af(ac(grids@coords[DT$dens,2])) + + eq1 <- c.listquote(paste("sum(",control.tacsat$clm,",na.rm=TRUE)",sep="")) + eq2 <- c.listquote(c("x","y")) + + byRect <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.tacsat$clm) + byRect$SI_LONG <- signif(anf(byRect$SI_LONG)) + byRect$SI_LATI <- signif(anf(byRect$SI_LATI)) + if(length(control.tacsat$clm)>1) rangeRect <- range(apply(byRect[,control.tacsat$clm],1,sum,na.rm=TRUE)) + if(length(control.tacsat$clm)==1) rangeRect <- range(byRect[,control.tacsat$clm],na.rm=TRUE) + rangeRect <- c(0,rangeRect[2]) + } + ctrl <- control.tacsat + } + + + #EFLALO + if(all(c("VE_FLT","VE_KW") %in% colnames(x))){ + if(level != "ICESrectangle") stop("Data supplied is 'eflalo' which only has ICESrectangle") + x$SI_LONG <- ICESrectangle2LonLat(ac(x$LE_RECT))[,2] + x$SI_LATI <- ICESrectangle2LonLat(ac(x$LE_RECT))[,1] + + #- Limit the spatial boundaries of eflalo + idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2]) + idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2]) + x <- x[idxx[which(idxx %in% idxy)],] + + #- Sum by rectangle + if(is.null(control.eflalo$clm)==TRUE) control.eflalo$clm <- colnames(x[,kgeur(colnames(x))]) + DT <- data.table(x) + eq1 <- c.listquote(paste("sum(",control.eflalo$clm,",na.rm=TRUE)",sep="")) + eq2 <- c.listquote(c("SI_LONG","SI_LATI")) + DT$SI_LONG <- af(ac(DT$SI_LONG)); DT$SI_LATI <- af(ac(DT$SI_LATI)) + + byRect <- data.frame(DT[,eval(eq1),by=eval(eq2)]); colnames(byRect) <- c("SI_LONG","SI_LATI",control.eflalo$clm) + byRect <- byRect[which(is.na(byRect$SI_LONG)==FALSE & is.na(byRect$SI_LATI) == FALSE),] + if(length(control.eflalo$clm)>1) rangeRect <- range(apply(byRect[,control.eflalo$clm],1,sum,na.rm=TRUE)) + if(length(control.eflalo$clm)==1) rangeRect <- range(byRect[,control.eflalo$clm],na.rm=TRUE) + rangeRect <- c(0,rangeRect[2]) + ctrl <- control.eflalo + } + + map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen");axis(1,las=las);axis(2,las=las);box() + for(iRect in 1:nrow(byRect)){ + if(log){ + if(is.null(zlim)==TRUE){ i <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(rangeRect[1]==0,0,log(rangeRect[1]))) + /(log(rangeRect[2]) - ifelse(rangeRect[1]==0,0,log(rangeRect[1])))*(length(color)-1)) +1 + } else { + i <- round((log(sum(byRect[iRect,ctrl$clm],na.rm=TRUE))-ifelse(zlim[1]==0,0,log(zlim[1]))) + /(log(zlim[2]) - ifelse(zlim[1]==0,0,log(zlim[1]))) *(length(color)-1)) +1 + } + } else { + if(is.null(zlim)==TRUE){ i <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(rangeRect[1]==0,0,rangeRect[1])) + /(rangeRect[2] - ifelse(rangeRect[1]==0,0,rangeRect[1])) *(length(color)-1)) +1 + } else { + i <- round((sum(byRect[iRect,ctrl$clm],na.rm=TRUE)-ifelse(zlim[1]==0,0,zlim[1])) + /(zlim[2] - ifelse(zlim[1]==0,0,zlim[1])) *(length(color)-1)) +1 + } + } + if(level == "ICESrectangle") polygon(x=c(an(ac(byRect[iRect,"SI_LONG"])),an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))+1,an(ac(byRect[iRect,"SI_LONG"]))), + y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2),rep(an(ac(byRect[iRect,"SI_LATI"]))+0.5,2)),col=color[i],lwd=1,border=NA) + if(level == "gridcell") polygon(x=c(an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))+gridcell[1]/2,an(ac(byRect[iRect,"SI_LONG"]))-gridcell[1]/2), + y=c(rep(an(ac(byRect[iRect,"SI_LATI"])),2)-gridcell[2]/2,rep(an(ac(byRect[iRect,"SI_LATI"]))+gridcell[2]/2,2)),col=color[i],lwd=1,border=NA) + } + map("worldHires",resolution=1,xlim=xlim,ylim=ylim,fill=TRUE,col="darkgreen",plt=FALSE,add=TRUE);box() + if(returnRange) return(rangeRect) +} diff --git a/vmstools/R/plotTreeMap.r b/vmstools/R/plotTreeMap.r index cb27d31..192b7c9 100644 --- a/vmstools/R/plotTreeMap.r +++ b/vmstools/R/plotTreeMap.r @@ -1,141 +1,193 @@ - -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -# F.Bastardie -# using stolen code on the net -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - -plotTreeMap <- -function (x, gridcell = c(0.1, 0.1), gear = "OTB", xlim = c(-1, - 17), ylim = c(52, 62), acolors = rainbow(7), species.to.keep = c("LE_KG_COD", - "LE_KG_NEP", "LE_KG_PLE", "LE_KG_SOL")) -{ - chop <- function(x) rev(rev(x)[-1]) - simple.hook <- function(z, xl, yl, xu, yu) { - rect(xl, yl, xu, yu, col = as.character(z$one), border = NA) - } - squarified.treemap <- function(z, x = 0, y = 0, w = 1, h = 1, - hook) { - cz <- cumsum(z$size)/sum(z$size) - n <- which.min(abs(log(max(w/h, h/w) * sum(z$size) * - cz^2/z$size))) - more <- n < length(z$size) - a <- c(0, cz[1:n])/cz[n] - if (h > w) { - hook(z[1:n, ], x + w * chop(a), rep(y, n), x + w * - a[-1], rep(y + h * cz[n], n)) - if (more) - Recall(z[-(1:n), ], x, y + h * cz[n], w, h * - (1 - cz[n]), hook) - } - else { - hook(z[1:n, ], rep(x, n), y + h * chop(a), rep(x + - w * cz[n], n), y + h * a[-1]) - if (more) - Recall(z[-(1:n), ], x + w * cz[n], y, w * (1 - - cz[n]), h, hook) - } - } - x <- x[x$SI_STATE == 1, ] - x$SI_LONG <- anf(x$SI_LONG) - x$SI_LATI <- anf(x$SI_LATI) - x <- x[!is.na(x$SI_LATI), ] - idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2]) - idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2]) - x <- x[idxx[which(idxx %in% idxy)], ] - xx <- x[x$LE_GEAR %in% gear, ] - grids <- createGrid(xrange = xlim, yrange = ylim, - gridcell[1], gridcell[2], type = "SpatialPixelsDataFrame") - coords <- SpatialPointsDataFrame(cbind(x = an(ac(xx$SI_LONG)), - y = an(ac(xx$SI_LATI))), data = xx) - coords@data$dens <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels")) - DT <- data.table(data.frame(coords)) - DT$x <- af(ac(grids@coords[DT$dens, 1])) - DT$y <- af(ac(grids@coords[DT$dens, 2])) - idx.col <- grep("KG", names(coords)) - eq1 <- c.listquote(paste("sum(", names(coords[, idx.col]), - ",na.rm=TRUE)", sep = "")) - eq2 <- c.listquote(c("x", "y")) - byRect <- data.frame(DT[, eval(eq1), by = eval(eq2)]) - colnames(byRect) <- c("SI_LONG", "SI_LATI", names(coords)[idx.col]) - byRect$SI_LONG <- signif(anf(byRect$SI_LONG)) - byRect$SI_LATI <- signif(anf(byRect$SI_LATI)) - idx.col <- grep("KG", colnames(byRect)) - rangeRect <- range(apply(byRect[idx.col], 1, sum, na.rm = TRUE)) - rangeRect <- c(0, rangeRect[2]) - A.sum <- apply(byRect[, idx.col], 1, sum, na.rm = TRUE) - A.sum2 <- apply(byRect[, idx.col], 2, sum, na.rm = TRUE) - species.to.merge <- names(A.sum2)[!names(A.sum2) %in% species.to.keep] - byRect$LE_KG_OTH <- apply(byRect[, species.to.merge], 1, - sum, na.rm = TRUE) - byRect <- byRect[, !names(byRect) %in% species.to.merge] - idx.col <- grep("KG", names(byRect)) - byRect[, idx.col] <- sweep(byRect[, idx.col], 1, A.sum, FUN = "/") - X11(7, 7) - require(mapdata) - map("worldHires", resolution = 1, xlim = xlim, ylim = ylim, - fill = TRUE, col = "darkgreen") - map.axes() - box() - for (iRect in 1:nrow(byRect)) { - x1 <- an(ac(byRect[iRect, "SI_LONG"])) - y1 <- an(ac(byRect[iRect, "SI_LATI"])) - size <- an(ac(byRect[iRect, idx.col])) - size <- replace(size, is.na(size) | size <= 0, 1e-04) ## DEBUG: NO NEGATIVE size ALLOWED - z <- data.frame(size = size, one = acolors[1:((1 + length(idx.col)) - - 1)]) - z <- z[order(-z$size), ] - print(z) - squarified.treemap(z, x = x1, y = y1, w = gridcell[1], - h = gridcell[2], hook = simple.hook) - } - for (i in seq(xlim[1], xlim[2], by = gridcell[1])) abline(v = i, - col = grey(0.9)) - for (i in seq(ylim[1], ylim[2], by = gridcell[2])) abline(h = i, - col = grey(0.9)) - map("worldHires", add = TRUE, resolution = 1, xlim = xlim, - ylim = ylim, fill = TRUE, col = "darkgreen") - map.axes() - box() - legend("topright", legend = gsub('LE_KG_','', names(byRect[, idx.col])), fill = acolors[1:((1 + - length(idx.col)) - 1)], bg = "white") - - - - return() -} - - -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## -##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## - - - ## do mergeEflalo2Pings().... - # ...and load the merged output table for all vessels - #load(file.path("C:","output","all_merged__1800.RData")) - - - #graphics.off() - #df1 <- all.merged[, c("LE_MET_level6", "LE_GEAR","SI_STATE", "SI_LATI","SI_LONG","LE_KG_COD","LE_KG_PLE")] - #df1$SI_LONG <- anf(df1$SI_LONG) - #df1$SI_LATI <- anf(df1$SI_LATI) - #df1 <- df1[ !is.na(df1$SI_LATI),] - #df1 <- df1[ !is.na(df1$SI_LONG),] - #df1$LE_MET <- df1$LE_MET_level6 - - - ## call to plotTreeMap() - #a.metier <- "TBB_DEF_70-99_0_0" - #a.gear <-"TBB" - #plotTreeMap (df1[df1$LE_MET_level6 %in% a.metier & df1$SI_STATE==1,], gridcell=c(0.1,0.05), gear=a.gear, - # xlim= c(3,6), ylim= c(50,54), acolors=rainbow(7), - # species.to.keep= c("LE_KG_COD","LE_KG_PLE") ) - #mtext("Latitude", 2, 3) ; mtext("Longitude",1, 2) - - - - \ No newline at end of file +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +# F.Bastardie +# using stolen code on the net +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + + + +#' Plot a squarified treemap of landing proportion per cell. +#' +#' Plot a squarified treemap of landing proportion per cell from the merged +#' VMS/logbooks tables. +#' +#' to be further custumized... +#' +#' @param x a merged data.frame such as the one produced by mergeEflalo2Pings() +#' @param gridcell grid cell resolution in degree +#' @param gear subset the data according to a(set of) gear(s) +#' @param xlim longitude range +#' @param ylim latitude range +#' @param acolors should be at least same of length of the species.to.keep +#' @param species.to.keep set of eflalo names for species (the same for +#' plotting cash value not implemented yet.) +#' @return plot a graph +#' @author Francois Bastardie +#' @references EU Lot 2 project +#' @examples +#' +#' +#' \dontrun{ +#' +#' +#' # do mergeEflalo2Pings().... +#' # ...and load the merged output table for all vessels +#' load(file.path("C:","output","all_merged__1800.RData")) +#' +#' +#' graphics.off() +#' df1 <- all.merged[, c("LE_MET_level6", "LE_GEAR","SI_STATE", "SI_LATI", +#' "SI_LONG","LE_KG_COD","LE_KG_PLE")] +#' df1$SI_LONG <- anf(df1$SI_LONG) +#' df1$SI_LATI <- anf(df1$SI_LATI) +#' df1 <- df1[ !is.na(df1$SI_LATI),] +#' df1 <- df1[ !is.na(df1$SI_LONG),] +#' df1$LE_MET <- df1$LE_MET_level6 +#' +#' +#' # call to plotTreeMap() +#' a.metier <- "TBB_DEF_70-99_0_0" +#' a.gear <-"TBB" +#' plotTreeMap (df1[df1$LE_MET_level6 %in% a.metier & df1$SI_STATE==1,], gridcell=c(0.1,0.05), gear=a.gear, +#' xlim= c(3,6), ylim= c(50,54), acolors=rainbow(7), +#' species.to.keep= c("LE_KG_COD","LE_KG_PLE") ) +#' mtext("Latitude", 2, 3) ; mtext("Longitude",1, 2) +#' +#' } +#' +#' +#' @export plotTreeMap +plotTreeMap <- +function (x, gridcell = c(0.1, 0.1), gear = "OTB", xlim = c(-1, + 17), ylim = c(52, 62), acolors = rainbow(7), species.to.keep = c("LE_KG_COD", + "LE_KG_NEP", "LE_KG_PLE", "LE_KG_SOL")) +{ + chop <- function(x) rev(rev(x)[-1]) + simple.hook <- function(z, xl, yl, xu, yu) { + rect(xl, yl, xu, yu, col = as.character(z$one), border = NA) + } + squarified.treemap <- function(z, x = 0, y = 0, w = 1, h = 1, + hook) { + cz <- cumsum(z$size)/sum(z$size) + n <- which.min(abs(log(max(w/h, h/w) * sum(z$size) * + cz^2/z$size))) + more <- n < length(z$size) + a <- c(0, cz[1:n])/cz[n] + if (h > w) { + hook(z[1:n, ], x + w * chop(a), rep(y, n), x + w * + a[-1], rep(y + h * cz[n], n)) + if (more) + Recall(z[-(1:n), ], x, y + h * cz[n], w, h * + (1 - cz[n]), hook) + } + else { + hook(z[1:n, ], rep(x, n), y + h * chop(a), rep(x + + w * cz[n], n), y + h * a[-1]) + if (more) + Recall(z[-(1:n), ], x + w * cz[n], y, w * (1 - + cz[n]), h, hook) + } + } + x <- x[x$SI_STATE == 1, ] + x$SI_LONG <- anf(x$SI_LONG) + x$SI_LATI <- anf(x$SI_LATI) + x <- x[!is.na(x$SI_LATI), ] + idxx <- which(x$SI_LONG >= xlim[1] & x$SI_LONG <= xlim[2]) + idxy <- which(x$SI_LATI >= ylim[1] & x$SI_LATI <= ylim[2]) + x <- x[idxx[which(idxx %in% idxy)], ] + xx <- x[x$LE_GEAR %in% gear, ] + grids <- createGrid(xrange = xlim, yrange = ylim, + gridcell[1], gridcell[2], type = "SpatialPixelsDataFrame") + coords <- SpatialPointsDataFrame(cbind(x = an(ac(xx$SI_LONG)), + y = an(ac(xx$SI_LATI))), data = xx) + coords@data$dens <- over(as(coords,"SpatialPoints"), as(grids,"SpatialPixels")) + DT <- data.table(data.frame(coords)) + DT$x <- af(ac(grids@coords[DT$dens, 1])) + DT$y <- af(ac(grids@coords[DT$dens, 2])) + idx.col <- grep("KG", names(coords)) + eq1 <- c.listquote(paste("sum(", names(coords[, idx.col]), + ",na.rm=TRUE)", sep = "")) + eq2 <- c.listquote(c("x", "y")) + byRect <- data.frame(DT[, eval(eq1), by = eval(eq2)]) + colnames(byRect) <- c("SI_LONG", "SI_LATI", names(coords)[idx.col]) + byRect$SI_LONG <- signif(anf(byRect$SI_LONG)) + byRect$SI_LATI <- signif(anf(byRect$SI_LATI)) + idx.col <- grep("KG", colnames(byRect)) + rangeRect <- range(apply(byRect[idx.col], 1, sum, na.rm = TRUE)) + rangeRect <- c(0, rangeRect[2]) + A.sum <- apply(byRect[, idx.col], 1, sum, na.rm = TRUE) + A.sum2 <- apply(byRect[, idx.col], 2, sum, na.rm = TRUE) + species.to.merge <- names(A.sum2)[!names(A.sum2) %in% species.to.keep] + byRect$LE_KG_OTH <- apply(byRect[, species.to.merge], 1, + sum, na.rm = TRUE) + byRect <- byRect[, !names(byRect) %in% species.to.merge] + idx.col <- grep("KG", names(byRect)) + byRect[, idx.col] <- sweep(byRect[, idx.col], 1, A.sum, FUN = "/") + X11(7, 7) + require(mapdata) + map("worldHires", resolution = 1, xlim = xlim, ylim = ylim, + fill = TRUE, col = "darkgreen") + map.axes() + box() + for (iRect in 1:nrow(byRect)) { + x1 <- an(ac(byRect[iRect, "SI_LONG"])) + y1 <- an(ac(byRect[iRect, "SI_LATI"])) + size <- an(ac(byRect[iRect, idx.col])) + size <- replace(size, is.na(size) | size <= 0, 1e-04) ## DEBUG: NO NEGATIVE size ALLOWED + z <- data.frame(size = size, one = acolors[1:((1 + length(idx.col)) - + 1)]) + z <- z[order(-z$size), ] + print(z) + squarified.treemap(z, x = x1, y = y1, w = gridcell[1], + h = gridcell[2], hook = simple.hook) + } + for (i in seq(xlim[1], xlim[2], by = gridcell[1])) abline(v = i, + col = grey(0.9)) + for (i in seq(ylim[1], ylim[2], by = gridcell[2])) abline(h = i, + col = grey(0.9)) + map("worldHires", add = TRUE, resolution = 1, xlim = xlim, + ylim = ylim, fill = TRUE, col = "darkgreen") + map.axes() + box() + legend("topright", legend = gsub('LE_KG_','', names(byRect[, idx.col])), fill = acolors[1:((1 + + length(idx.col)) - 1)], bg = "white") + + + + return() +} + + +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## +##!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!## + + + ## do mergeEflalo2Pings().... + # ...and load the merged output table for all vessels + #load(file.path("C:","output","all_merged__1800.RData")) + + + #graphics.off() + #df1 <- all.merged[, c("LE_MET_level6", "LE_GEAR","SI_STATE", "SI_LATI","SI_LONG","LE_KG_COD","LE_KG_PLE")] + #df1$SI_LONG <- anf(df1$SI_LONG) + #df1$SI_LATI <- anf(df1$SI_LATI) + #df1 <- df1[ !is.na(df1$SI_LATI),] + #df1 <- df1[ !is.na(df1$SI_LONG),] + #df1$LE_MET <- df1$LE_MET_level6 + + + ## call to plotTreeMap() + #a.metier <- "TBB_DEF_70-99_0_0" + #a.gear <-"TBB" + #plotTreeMap (df1[df1$LE_MET_level6 %in% a.metier & df1$SI_STATE==1,], gridcell=c(0.1,0.05), gear=a.gear, + # xlim= c(3,6), ylim= c(50,54), acolors=rainbow(7), + # species.to.keep= c("LE_KG_COD","LE_KG_PLE") ) + #mtext("Latitude", 2, 3) ; mtext("Longitude",1, 2) + + + + diff --git a/vmstools/R/plot_mcp.r b/vmstools/R/plot_mcp.r index 76f9faf..98b072d 100644 --- a/vmstools/R/plot_mcp.r +++ b/vmstools/R/plot_mcp.r @@ -1,32 +1,65 @@ -plot_mcp <- function (plotnew = TRUE, plotpoints = TRUE, points.col = "black", - points.pch = 1, titletxt = "Title", xaxis = "Easting (m)", - yaxis = "Northing (m)", mcp.col = "black", mcp.lwd = 2, fill.col = NA, - jpeg = FALSE, ...) -{ - par(...) - if (jpeg) { - jpeg(filename = paste("MCP", r.MCP$id, ".jpg", sep = ""), - width = 600, height = 600, pointsize = 12, quality = 90, - bg = "white", res = NA) - } - if (plotnew) { - plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col, - col = mcp.col, lwd = mcp.lwd) - title(paste(titletxt, sep = "")) - } - else { - default.parameters <- par(no.readonly = TRUE) - xlim.max <- c(default.parameters$usr[1], default.parameters$usr[2]) - ylim.max <- c(default.parameters$usr[3], default.parameters$usr[4]) - plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col, - col = mcp.col, lwd = mcp.lwd, xlim = xlim.max, ylim = ylim.max, - add = TRUE) - title(paste(titletxt, sep = "")) - } - if (plotpoints) { - points(r.MCP$points, col = points.col, pch = points.pch) - } - if (jpeg) { - dev.off() - } -} +#' Plot the Minimum Convex Polygon +#' +#' This function plots the MCP as a polygon, which covers the geographical +#' extent of a set of points on a Cartesian plane. +#' +#' The r.MCP object (generated in calc_mcp function) is required to plot the +#' MCP. +#' +#' @param plotnew Boolean: Set to TRUE to create a new plot. Set to FALSE to +#' overlay current plot. +#' @param plotpoints Boolean: Set to TRUE if the point observations are to be +#' plotted +#' @param points.col Specify a colour for the point observations +#' @param points.pch Specify a plotting symbol for the point observations +#' @param titletxt A string to use as the title on the plot +#' @param xaxis A string to label the x-axis of the plot +#' @param yaxis A string to label the y-axis of the plot +#' @param mcp.col Specify the line colour for the MCP +#' @param mcp.lwd Specify the line width for the MCP +#' @param fill.col Specify a fill colour for the MCP +#' @param jpeg Boolean: Set to TRUE if the plot should be saved in JPEG format +#' @param \dots Arguments to be passed to graphical parameters +#' @author Randy Bui, Ron N. Buliung, Tarmo K. Remmel +#' @examples +#' +#' data(tacsat) +#' +#' calc_mcp(id=1, points = tacsat[1:10,c("SI_LONG","SI_LATI")], filename="MCP_Output.txt", +#' verbose = FALSE, pct = 100) +#' plot_mcp(plotnew=TRUE, plotpoints=TRUE, titletxt="Title", +#' xaxis= "Easting (m)", yaxis="Northing (m)") +#' +#' @export plot_mcp +plot_mcp <- function (plotnew = TRUE, plotpoints = TRUE, points.col = "black", + points.pch = 1, titletxt = "Title", xaxis = "Easting (m)", + yaxis = "Northing (m)", mcp.col = "black", mcp.lwd = 2, fill.col = NA, + jpeg = FALSE, ...) +{ + par(...) + if (jpeg) { + jpeg(filename = paste("MCP", r.MCP$id, ".jpg", sep = ""), + width = 600, height = 600, pointsize = 12, quality = 90, + bg = "white", res = NA) + } + if (plotnew) { + plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col, + col = mcp.col, lwd = mcp.lwd) + title(paste(titletxt, sep = "")) + } + else { + default.parameters <- par(no.readonly = TRUE) + xlim.max <- c(default.parameters$usr[1], default.parameters$usr[2]) + ylim.max <- c(default.parameters$usr[3], default.parameters$usr[4]) + plot(r.MCP$MCP, xlab = xaxis, ylab = yaxis, colpol = fill.col, + col = mcp.col, lwd = mcp.lwd, xlim = xlim.max, ylim = ylim.max, + add = TRUE) + title(paste(titletxt, sep = "")) + } + if (plotpoints) { + points(r.MCP$points, col = points.col, pch = points.pch) + } + if (jpeg) { + dev.off() + } +} diff --git a/vmstools/R/pointInHarbour.r b/vmstools/R/pointInHarbour.r index 7a43b22..47dacdd 100644 --- a/vmstools/R/pointInHarbour.r +++ b/vmstools/R/pointInHarbour.r @@ -1,83 +1,144 @@ -pointInHarbour <- function(lon,lat,harbours,rowSize=30, returnNames=FALSE,saveHarbourList=TRUE){ - - xharb <- harbours$lon - yharb <- harbours$lat - rharb <- harbours$range - harb <- cbind(xharb,yharb,rharb) - if("Description" %in% colnames(harbours)) rownames(harb) <- harbours$Description - if("harbour" %in% colnames(harbours)) rownames(harb) <- harbours$harbour - harb <- orderBy(~xharb+yharb,data=harb) - - xys <- data.frame(lon,lat) - ordxys <- order(xys$lon,xys$lat) - lon <- lon[ordxys] - lat <- lat[ordxys] - - nChunks <- ceiling(length(lon)/rowSize) - store <- rep(0, length(lon)) - for(chunks in 1:nChunks){ - if(chunks == nChunks){ - x1 <- lon[(chunks*rowSize-rowSize+1):length(lon)] - y1 <- lat[(chunks*rowSize-rowSize+1):length(lon)] - } else { - x1 <- lon[(chunks*rowSize-rowSize+1):(chunks*rowSize)] - y1 <- lat[(chunks*rowSize-rowSize+1):(chunks*rowSize)] - } - - xr <- range(x1,na.rm=TRUE); xr <- c(xr[1]-0.05,xr[2]+0.05) - yr <- range(y1,na.rm=TRUE); yr <- c(yr[1]-0.05,yr[2]+0.05) - res1 <- which(harb[,"xharb"] >= xr[1] & harb[,"xharb"] <= xr[2]) - res2 <- which(harb[,"yharb"] >= yr[1] & harb[,"yharb"] <= yr[2]) - res3 <- res1[which(is.na(pmatch(res1,res2))==FALSE)] - - if(length(res3)>0){ - for(hars in res3){ - #print(hars) - x2 <- harb[hars,"xharb"] - y2 <- harb[hars,"yharb"] - - pd <- pi/180 - - a1 <- sin(((y2-y1)*pd)/2) - a2 <- cos(y1*pd) - a3 <- cos(y2*pd) - a4 <- sin(((x2-x1)*pd)/2) - a <- a1*a1+a2*a3*a4*a4 - - c <- 2*atan2(sqrt(a),sqrt(1-a)); - R <- 6371; - dx1 <- R*c - - res <- numeric(length(x1)) - idx <- which(dx1<=harb[hars,"rharb"]) - res[idx] <- 1 - if(returnNames){ - res[idx] <- rownames(harb)[hars] # overwrite '1' with the port names - if(chunks==nChunks){ - idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):length(lon)][idx] == "0")] - store[(chunks*rowSize-rowSize+1):length(lon)][idx2] <- res[idx2] - } else { - idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx] == "0")] - store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx2] <- res[idx2] - } - } else { - if(chunks==nChunks){ - store[(chunks*rowSize-rowSize+1):length(lon)] <- store[(chunks*rowSize-rowSize+1):length(lon)]+res - } else { - store[(chunks*rowSize-rowSize+1):(chunks*rowSize)] <- store[(chunks*rowSize-rowSize+1):(chunks*rowSize)]+res - } - } - } - } - } - if(returnNames == FALSE) store[which(store>0)] <- 1 - #Get order in tacsat back - store[ordxys] <- store - - if(returnNames) store <- replace(store, store=="0", NA) - if(saveHarbourList) write.table(harbours,file="harbourList_pointInHarbour.txt",append=FALSE,sep="\t") - -return(store)} - - - +#' Find points in harbour within specified range +#' +#' Method to find the gps positions given with tacsat data that are situated +#' within a range of a port. +#' +#' The method returns the index of points that are within a harbour area, given +#' the midpoints of the harbours and a range (in km) from these midpoints. +#' +#' @param lon Longitudinal positions of the TACSAT formatted data +#' @param lat Latitudinal positions of teh TACSAT formatted data +#' @param harbour Latitudinal and Longitudinal position of the harbour and +#' outer range from midpoint of harbour +#' @param returnNames Logical: return the name of the harbour instead of 1 / 0 +#' indicating if it is inside or outside the harbour. Default to FALSE +#' @param saveHarbourList Logical: writing harbour list used in function to +#' file. Default to TRUE +#' @author Niels T. Hintzen +#' @seealso \code{\link{distance}}, \code{\link{lonLatRatio}}, +#' \code{\link{sortTacsat}}, \code{\link{filterTacsat}}, +#' \code{\link{mergeEflalo2Tacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(eflalo) +#' data(tacsat) +#' data(euharbours); euharbours <- harbours +#' +#' #-Remove duplicated records from tacsat +#' myf <- paste(tacsat$VE_REF,tacsat$SI_LATI,tacsat$SI_LONG, +#' tacsat$SI_DATE,tacsat$SI_TIME); +#' tacsat <- tacsat[!duplicated(myf),]; +#' +#' +#' #-Find all the gps locations that are located within the port area +#' idx <- pointInHarbour(lon=tacsat$SI_LONG,lat=tacsat$SI_LATI, +#' harbours=harbours,returnNames=TRUE) +#' print(head(idx)) +#' getwd() #in this directory, the harbour list will be written to disk +#' idx <- pointInHarbour(lon=tacsat$SI_LONG,lat=tacsat$SI_LATI, +#' harbours=harbours,saveHarbourList=TRUE) +#' +#' idx <- pointInHarbour(lon=tacsat$SI_LONG,lat=tacsat$SI_LATI, +#' harbours=harbours) +#' idx <- which(idx==1) +#' +#' #-Plot these port locations on a map +#' library(maps); library(mapdata) +#' #map the world, but plot only the northsea by lon and lat limits, +#' # in high resolution +#' xrange <- range(tacsat$SI_LONG[idx]) +#' yrange <- range(tacsat$SI_LATI[idx]) +#' +#' map('worldHires',xlim=xrange,ylim=yrange,col="darkgreen",fill=TRUE, +#' resolution=1, bg="white", border=0) +#' map.axes(); +#' mtext("Longitude",1,line=-2,outer=TRUE,cex=1.2,font=2) +#' mtext("Latitude",2,line=-2,outer=TRUE,cex=1.2,font=2) +#' +#' points(tacsat$SI_LONG[idx],tacsat$SI_LATI[idx],cex=0.1,pch=16,col="red") +#' +#' @export pointInHarbour +pointInHarbour <- function(lon,lat,harbours,rowSize=30, returnNames=FALSE,saveHarbourList=TRUE){ + + xharb <- harbours$lon + yharb <- harbours$lat + rharb <- harbours$range + harb <- cbind(xharb,yharb,rharb) + if("Description" %in% colnames(harbours)) rownames(harb) <- harbours$Description + if("harbour" %in% colnames(harbours)) rownames(harb) <- harbours$harbour + harb <- orderBy(~xharb+yharb,data=harb) + + xys <- data.frame(lon,lat) + ordxys <- order(xys$lon,xys$lat) + lon <- lon[ordxys] + lat <- lat[ordxys] + + nChunks <- ceiling(length(lon)/rowSize) + store <- rep(0, length(lon)) + for(chunks in 1:nChunks){ + if(chunks == nChunks){ + x1 <- lon[(chunks*rowSize-rowSize+1):length(lon)] + y1 <- lat[(chunks*rowSize-rowSize+1):length(lon)] + } else { + x1 <- lon[(chunks*rowSize-rowSize+1):(chunks*rowSize)] + y1 <- lat[(chunks*rowSize-rowSize+1):(chunks*rowSize)] + } + + xr <- range(x1,na.rm=TRUE); xr <- c(xr[1]-0.05,xr[2]+0.05) + yr <- range(y1,na.rm=TRUE); yr <- c(yr[1]-0.05,yr[2]+0.05) + res1 <- which(harb[,"xharb"] >= xr[1] & harb[,"xharb"] <= xr[2]) + res2 <- which(harb[,"yharb"] >= yr[1] & harb[,"yharb"] <= yr[2]) + res3 <- res1[which(is.na(pmatch(res1,res2))==FALSE)] + + if(length(res3)>0){ + for(hars in res3){ + #print(hars) + x2 <- harb[hars,"xharb"] + y2 <- harb[hars,"yharb"] + + pd <- pi/180 + + a1 <- sin(((y2-y1)*pd)/2) + a2 <- cos(y1*pd) + a3 <- cos(y2*pd) + a4 <- sin(((x2-x1)*pd)/2) + a <- a1*a1+a2*a3*a4*a4 + + c <- 2*atan2(sqrt(a),sqrt(1-a)); + R <- 6371; + dx1 <- R*c + + res <- numeric(length(x1)) + idx <- which(dx1<=harb[hars,"rharb"]) + res[idx] <- 1 + if(returnNames){ + res[idx] <- rownames(harb)[hars] # overwrite '1' with the port names + if(chunks==nChunks){ + idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):length(lon)][idx] == "0")] + store[(chunks*rowSize-rowSize+1):length(lon)][idx2] <- res[idx2] + } else { + idx2 <- idx[which(store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx] == "0")] + store[(chunks*rowSize-rowSize+1):(chunks*rowSize)][idx2] <- res[idx2] + } + } else { + if(chunks==nChunks){ + store[(chunks*rowSize-rowSize+1):length(lon)] <- store[(chunks*rowSize-rowSize+1):length(lon)]+res + } else { + store[(chunks*rowSize-rowSize+1):(chunks*rowSize)] <- store[(chunks*rowSize-rowSize+1):(chunks*rowSize)]+res + } + } + } + } + } + if(returnNames == FALSE) store[which(store>0)] <- 1 + #Get order in tacsat back + store[ordxys] <- store + + if(returnNames) store <- replace(store, store=="0", NA) + if(saveHarbourList) write.table(harbours,file="harbourList_pointInHarbour.txt",append=FALSE,sep="\t") + +return(store)} + + + diff --git a/vmstools/R/pointOnLand.r b/vmstools/R/pointOnLand.r index 01874f3..95d38e2 100644 --- a/vmstools/R/pointOnLand.r +++ b/vmstools/R/pointOnLand.r @@ -1,24 +1,57 @@ -pointOnLand <- function(tacsat,lands,proj4string=NULL){ - if(class(lands) != "SpatialPolygons") stop("'lands' must be specified as class 'SpatialPolygons'") - - totres <- rep(0,length(tacsat$SI_LONG)) - - if(is.null(proj4string)==TRUE){ - #No projection string used - if(is.na(proj4string(lands))==FALSE) stop("Projection defined for lands, use proj4string argument in function") - spPoint <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI)) - idx <- over(spPoint,lands) - totres[which(is.na(idx)==FALSE)] <- 1 - totres[which(is.na(idx)==TRUE)] <- 0 - - } else { - #Use projection string - proj4string(lands)<- proj4string - spPoint <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI),proj4string=proj4string) - idx <- over(spPoint,lands) - totres[which(is.na(idx)==FALSE)] <- 1 - totres[which(is.na(idx)==TRUE)] <- 0 - } -return(totres)} - - +#' Find points on land given a set of coordinates +#' +#' Find the points that are on land given a set of coordinates and polygons +#' that determine the part that is land +#' +#' With many coordinates, the checking might take longer. +#' +#' @param tacsat Tacsat file +#' @param lands Polygon of area that is considered to be land +#' @param proj4string Projection string, default to NULL. +#' @return Returns a vector with values 0 and 1. 1 indicating points on land, 0 +#' indicating points not on land. +#' @author Niels T. Hintzen +#' @seealso \code{\link{pointInHarbour}} +#' @references EU Lot 2 project +#' @examples +#' +#' +#' data(tacsat) +#' data(europa) +#' tacsat <- tacsat[1:1000,] +#' tacsat <- sortTacsat(tacsat) +#' +#' pols <- lonLat2SpatialPolygons(lst=lapply(as.list(sort(unique(europa$SID))), +#' function(x){data.frame(SI_LONG=subset(europa,SID==x)$X, +#' SI_LATI=subset(europa,SID==x)$Y)})) +#' idx <- pointOnLand(tacsat,pols); +#' idx <- which(idx == 1) +#' +#' plotMap(europa,xlim=c(0,10),ylim=c(48,62)) +#' points(tacsat$SI_LONG[idx],tacsat$SI_LATI[idx],col="red",cex=0.5,pch=19) +#' +#' @export pointOnLand +pointOnLand <- function(tacsat,lands,proj4string=NULL){ + if(class(lands) != "SpatialPolygons") stop("'lands' must be specified as class 'SpatialPolygons'") + + totres <- rep(0,length(tacsat$SI_LONG)) + + if(is.null(proj4string)==TRUE){ + #No projection string used + if(is.na(proj4string(lands))==FALSE) stop("Projection defined for lands, use proj4string argument in function") + spPoint <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI)) + idx <- over(spPoint,lands) + totres[which(is.na(idx)==FALSE)] <- 1 + totres[which(is.na(idx)==TRUE)] <- 0 + + } else { + #Use projection string + proj4string(lands)<- proj4string + spPoint <- SpatialPoints(data.frame(x=tacsat$SI_LONG,y=tacsat$SI_LATI),proj4string=proj4string) + idx <- over(spPoint,lands) + totres[which(is.na(idx)==FALSE)] <- 1 + totres[which(is.na(idx)==TRUE)] <- 0 + } +return(totres)} + + diff --git a/vmstools/R/poolEflaloSpecies.r b/vmstools/R/poolEflaloSpecies.r index 27512bd..a94a1d7 100644 --- a/vmstools/R/poolEflaloSpecies.r +++ b/vmstools/R/poolEflaloSpecies.r @@ -1,28 +1,42 @@ - # reduce the size of the eflalo data by merging species - # threshold in euros -poolEflaloSpecies <- function(eflalo, threshold.value=3000000, code="MZZ"){ - if(!paste("LE_KG_", code, sep='') %in% colnames(eflalo)) { - eflalo [, paste("LE_KG_", code, sep='')] <- NA - eflalo [, paste("LE_EURO_", code, sep='')] <- NA - } - get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) - nm <- names(eflalo) - idx.col.w <- grep('KG', nm) # index columns weight - idx.col.v <- grep('EURO', nm) # index columns value - v <- apply(eflalo [,idx.col.v], 2, sum, na.rm=TRUE) - sp.to.merge.v <- names(v)[v < threshold.value] # e.g. threshold in euros - sp.to.merge.w <- paste('LE_KG_', get.sp(sp.to.merge.v ), sep='') - sp.to.merge.w <- sp.to.merge.w[sp.to.merge.w != paste("LE_KG_", code, sep='')] - sp.to.merge.v <- sp.to.merge.v[sp.to.merge.v != paste("LE_EURO_", code, sep='')] - OTH.w <- apply(eflalo[,sp.to.merge.w], 1, sum, na.rm=TRUE) - OTH.v <- apply(eflalo[,sp.to.merge.v], 1, sum, na.rm=TRUE) - eflalo <- eflalo [, !nm %in% c(sp.to.merge.w, sp.to.merge.v)] - eflalo[,paste("LE_KG_", code, sep='')] <- replace(eflalo[,paste("LE_KG_", code, sep='')], - is.na(eflalo[,paste("LE_KG_", code, sep='')]), 0) - eflalo[,paste("LE_KG_", code, sep='')] <- eflalo[,paste("LE_KG_", code, sep='')] + OTH.w - eflalo[,paste("LE_EURO_", code, sep='')] <- replace(eflalo[,paste("LE_EURO_", code, sep='')], - is.na(eflalo[,paste("LE_EURO_", code, sep='')]), 0) - eflalo[,paste("LE_EURO_", code, sep='')] <- eflalo[,paste("LE_EURO_", code, sep='')] + OTH.v - return(eflalo) - } - \ No newline at end of file + # reduce the size of the eflalo data by merging species + # threshold in euros + + +#' merge species in eflalo to reduce the size of data according to a threshold +#' in euros +#' +#' merge species in eflalo to reduce the size of data according to a threshold +#' in euros +#' +#' +#' @param eflalo data.frame, eflalo format +#' @param threshold.value numeric, in euro +#' @param code character, a code for the species 'other' +#' @author Francois Bastardie +#' @export poolEflaloSpecies +poolEflaloSpecies <- function(eflalo, threshold.value=3000000, code="MZZ"){ + if(!paste("LE_KG_", code, sep='') %in% colnames(eflalo)) { + eflalo [, paste("LE_KG_", code, sep='')] <- NA + eflalo [, paste("LE_EURO_", code, sep='')] <- NA + } + get.sp <- function (nm) unlist(lapply(strsplit(nm, split="_"), function(x) x[3])) + nm <- names(eflalo) + idx.col.w <- grep('KG', nm) # index columns weight + idx.col.v <- grep('EURO', nm) # index columns value + v <- apply(eflalo [,idx.col.v], 2, sum, na.rm=TRUE) + sp.to.merge.v <- names(v)[v < threshold.value] # e.g. threshold in euros + sp.to.merge.w <- paste('LE_KG_', get.sp(sp.to.merge.v ), sep='') + sp.to.merge.w <- sp.to.merge.w[sp.to.merge.w != paste("LE_KG_", code, sep='')] + sp.to.merge.v <- sp.to.merge.v[sp.to.merge.v != paste("LE_EURO_", code, sep='')] + OTH.w <- apply(eflalo[,sp.to.merge.w], 1, sum, na.rm=TRUE) + OTH.v <- apply(eflalo[,sp.to.merge.v], 1, sum, na.rm=TRUE) + eflalo <- eflalo [, !nm %in% c(sp.to.merge.w, sp.to.merge.v)] + eflalo[,paste("LE_KG_", code, sep='')] <- replace(eflalo[,paste("LE_KG_", code, sep='')], + is.na(eflalo[,paste("LE_KG_", code, sep='')]), 0) + eflalo[,paste("LE_KG_", code, sep='')] <- eflalo[,paste("LE_KG_", code, sep='')] + OTH.w + eflalo[,paste("LE_EURO_", code, sep='')] <- replace(eflalo[,paste("LE_EURO_", code, sep='')], + is.na(eflalo[,paste("LE_EURO_", code, sep='')]), 0) + eflalo[,paste("LE_EURO_", code, sep='')] <- eflalo[,paste("LE_EURO_", code, sep='')] + OTH.v + return(eflalo) + } + diff --git a/vmstools/R/predictMetier.r b/vmstools/R/predictMetier.r index 1e1aab3..3fcdd64 100644 --- a/vmstools/R/predictMetier.r +++ b/vmstools/R/predictMetier.r @@ -1,55 +1,131 @@ -predictMetier=function(learningData=Step1,clustersAffectation=clust2007,newData=datPred){ - - # Select only the columns of newData corresponding to species of learningData - namesSpecies=colnames(learningData) - le_id_newData=rownames(newData) - newData=newData[,namesSpecies] - # Transform quantities of newData to percentages of each logevent totale catch - newData=transformation_proportion(newData) - - # Select the logevents without catch for the selected species - nullCatch=which(apply(newData,1,sum)==0) - newDataWithoutCatch=newData[nullCatch,] - if(length(nullCatch)==1){ - le_id_newDataWithoutCatch=names(nullCatch) - }else{ - le_id_newDataWithoutCatch=rownames(newDataWithoutCatch) - } - - # Select the logevents with catch for the selected species - positiveCatch=setdiff(1:nrow(newData),nullCatch) - newDataWithCatch=newData[positiveCatch,] - - # Prepare learningData for the discriminant analysis - le_id_learningData=rownames(learningData) - nbSpeciesLearningData=ncol(learningData) # Number of species of learningData - nameLearningDataSpecies=colnames(learningData) - learningData=as.data.frame(cbind(learningData,as.factor(clustersAffectation))) - colnames(learningData)=c(colnames(learningData[1:nbSpeciesLearningData]),"clust") - - # Calibrate the model with learningData - print("------------ Learning ------------") - learning=fda(clust~.,data=learningData) - - # Predict the metier of each logevent in newDataWithCatch - print("------------ Predicting ------------") - result=predict(learning,newdata=newDataWithCatch) - predictedClusters=data.frame(rownames(newDataWithCatch),result) - dimnames(predictedClusters)[[2]]=c("LE_ID","Class") - - # Give the metier "0" (unknown metier) for the logevents in newDataWithoutCatch - if(length(le_id_newDataWithoutCatch)!=0){ - notPredictedClusters=data.frame(le_id_newDataWithoutCatch, - as.factor(rep(0,length(le_id_newDataWithoutCatch)))) - dimnames(notPredictedClusters)[[2]]=c("LE_ID","Class") - } - - # Create a table linking "LE_ID" and metier for each logevent in newData - if(length(le_id_newDataWithoutCatch)!=0){ - clustersForAllLogevents=rbind(predictedClusters,notPredictedClusters) - }else{ - clustersForAllLogevents=predictedClusters - } - - return(clustersForAllLogevents=clustersForAllLogevents) -} \ No newline at end of file +#' Finding metiers for a new reduced EFLALO dataset from a reduced EFLALO +#' dataset for which the metiers are known. +#' +#' This function permits to determine the metiers of new logevents belonging to +#' a new reduced EFLALO dataset, from a reduced EFLALO dataset for which the +#' metiers are known. For example, this function permits to determine the +#' metiers of logevents realized in 2008, basing on metiers of logevents +#' realized in 2007. It works with a discriminante analysis ran with the +#' function fda. In the first time, the function fda calibrates a model basing +#' on the dataset for which the metiers are known. In the second time, the +#' function predict is used to predict the metiers of new logevents, thanks to +#' the previous model. +#' +#' +#' @param learningData A numerical matrix with logevents as lines and species +#' as columns, with percentage values (between 0 and 100) of each species in +#' the logevent catches. Logevent ID (LE_ID) should be as row names. Typically, +#' this table will be produced from a eflalo dataset at the step 1 of the +#' metier analysis, using the function extractTableMainSpecies() +#' @param clustersAffectation An integer vector of length n, the number of +#' logevents in learningData, giving for each logevent the number of the +#' cluster to which it belongs. This vector will be produced at the step 3 of +#' the metier analysis, using the function getMetierClusters(). +#' @param newData A numerical matrix with logevents as lines and species as +#' columns, with raw values of each species in the logevent catches. Logevent +#' ID (LE_ID) should be as row names. +#' @return A data frame giving the logevent ID (LE_ID) (first column) and the +#' number of the cluster to which it belongs (second column), for all logevents +#' in newData. The cluster number "0" corresponds to the unknown metier. +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @seealso \code{\link{selectMainSpecies}}, +#' \code{\link{extractTableMainSpecies}}, \code{\link{getTableAfterPCA}}, +#' \code{\link{getMetierClusters}} +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @examples +#' +#' +#' \dontrun{ +#' +#' data(eflalo) +#' +#' # Format +#' newEflalo <- formatEflalo(eflalo) +#' newEflalo <- newEflalo[newEflalo$LE_GEAR=="OTB",] +#' +#' # Note that output plots will be sent to getwd() +#' analysisName <- "metier_analysis_OTB" +#' +#' # Prepare datPred +#' datPred <- newEflalo[,c("LE_ID",grep("EURO",names(newEflalo),value=TRUE))] +#' datPred[is.na(datPred)] <- 0 +#' names(datPred)[-1] <- unlist(lapply(strsplit(names(datPred[,-1]),"_"), +#' function(x) x[[3]])) +#' le_id_datPred <- datPred[,"LE_ID"] +#' datPred <- datPred[,-1] +#' datPred <- as.matrix(datPred) +#' rownames(datPred) <- le_id_datPred +#' +#' if (methMetier=="hac") clustersAffectation <- Step3$clusters +#' if (methMetier=="clara") clustersAffectation <- Step3$clusters$clustering +#' if (methMetier=="kmeans") clustersAffectation <- Step3$clusters$cluster +#' +#' # Predict the metiers of the new logbook events of newData from the metiers +#' # found in Step 3 +#' metierPred <- predictMetier(learningData=Step1, +#' clustersAffectation=clustersAffectation,newData=datPred) +#' +#' } +#' +#' +#' @export predictMetier +predictMetier=function(learningData=Step1,clustersAffectation=clust2007,newData=datPred){ + + # Select only the columns of newData corresponding to species of learningData + namesSpecies=colnames(learningData) + le_id_newData=rownames(newData) + newData=newData[,namesSpecies] + # Transform quantities of newData to percentages of each logevent totale catch + newData=transformation_proportion(newData) + + # Select the logevents without catch for the selected species + nullCatch=which(apply(newData,1,sum)==0) + newDataWithoutCatch=newData[nullCatch,] + if(length(nullCatch)==1){ + le_id_newDataWithoutCatch=names(nullCatch) + }else{ + le_id_newDataWithoutCatch=rownames(newDataWithoutCatch) + } + + # Select the logevents with catch for the selected species + positiveCatch=setdiff(1:nrow(newData),nullCatch) + newDataWithCatch=newData[positiveCatch,] + + # Prepare learningData for the discriminant analysis + le_id_learningData=rownames(learningData) + nbSpeciesLearningData=ncol(learningData) # Number of species of learningData + nameLearningDataSpecies=colnames(learningData) + learningData=as.data.frame(cbind(learningData,as.factor(clustersAffectation))) + colnames(learningData)=c(colnames(learningData[1:nbSpeciesLearningData]),"clust") + + # Calibrate the model with learningData + print("------------ Learning ------------") + learning=fda(clust~.,data=learningData) + + # Predict the metier of each logevent in newDataWithCatch + print("------------ Predicting ------------") + result=predict(learning,newdata=newDataWithCatch) + predictedClusters=data.frame(rownames(newDataWithCatch),result) + dimnames(predictedClusters)[[2]]=c("LE_ID","Class") + + # Give the metier "0" (unknown metier) for the logevents in newDataWithoutCatch + if(length(le_id_newDataWithoutCatch)!=0){ + notPredictedClusters=data.frame(le_id_newDataWithoutCatch, + as.factor(rep(0,length(le_id_newDataWithoutCatch)))) + dimnames(notPredictedClusters)[[2]]=c("LE_ID","Class") + } + + # Create a table linking "LE_ID" and metier for each logevent in newData + if(length(le_id_newDataWithoutCatch)!=0){ + clustersForAllLogevents=rbind(predictedClusters,notPredictedClusters) + }else{ + clustersForAllLogevents=predictedClusters + } + + return(clustersForAllLogevents=clustersForAllLogevents) +} diff --git a/vmstools/R/raiseTacsat.r b/vmstools/R/raiseTacsat.r index 496db90..f34d1ca 100644 --- a/vmstools/R/raiseTacsat.r +++ b/vmstools/R/raiseTacsat.r @@ -1,138 +1,193 @@ -raiseTacsat <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF","SI_DAY","LE_RECT","SI_YEAR"),sortBy=TRUE){ - - - #----------------------------------------------------------------------------- - #- Add columns on dates - #----------------------------------------------------------------------------- - if(!"SI_DATIM" %in% colnames(tacsat)) - tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - if(!"FT_DDATIM" %in% colnames(eflalo)) - eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT") - if(!"FT_LDATIM" %in% colnames(eflalo)) - eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT") - if(!"INTV" %in% colnames(tacsat)) - stop("Specify time interval column in tacsat (e.g. use intervalTacsat)") - - #- Add date notation - if("SI_DAY" %in% unique(c(by))){ - eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_DAY <- yday(tacsat$SI_DATIM) - } - if("SI_YEAR" %in% unique(c(by))){ - eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_YEAR<- year(tacsat$SI_DATIM) - } - if("SI_MONTH"%in% unique(c(by))){ - eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_MONTH<- month(tacsat$SI_DATIM) - } - if("SI_WEEK"%in% unique(c(by))){ - eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_WEEK<- week(tacsat$SI_DATIM) - } - if("SI_QUARTER"%in% unique(c(by))){ - eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) - tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM) - } - - #- Select time levels + order these - timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR") - byTime <- by[which(by %in% timePos)] - byTime <- timePos[which(timePos %in% byTime)] - - #----------------------------------------------------------------------------- - #-Add spatial location - #----------------------------------------------------------------------------- - if("LE_RECT" %in% by) - tacsat$LE_RECT <- ICESrectangle(tacsat) - if("LE_ICESAREA" %in% by){ - data(ICESareas) - tacsat$LE_AREA <- ICESarea(tacsat,ICESareas) - tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER" - eflonlat <- ICESrectangle2LonLat(eflalo$LE_RECT) - eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas) - eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER" - } - - #- Select area levels + order these - areaPos<- c("LE_RECT","LE_ICESAREA") - byArea <- by[which(by %in% areaPos)] - byArea <- areaPos[which(areaPos %in% byArea)] - - #----------------------------------------------------------------------------- - #- Calculate time possible to spend per day fishing - #----------------------------------------------------------------------------- - eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT") - eflalo$INTV <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")) - eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0), - c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")), - ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), - c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")), - ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), - c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")), - 1440))) - # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip - eflalo$dummy <- 1 - eflalo <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T) - colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF" - if("SI_DAY" %in% by){ - eflalo$INTVDAY <- eflalo$FT_DURDAY / eflalo$NR_FT_REF - } else { - eflalo$INTVDAY <- eflalo$INTV / eflalo$NR_FT_REF - } - eflalo <- eflalo[,-grep("dummy",colnames(eflalo))] - eflalo <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))] - eflalo <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))] - - #----------------------------------------------------------------------------- - #- Check if all colums in both tacsat and eflalo are available - #----------------------------------------------------------------------------- - if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0) - stop("elements specified in 'by' are not available as columns in either tacsat or eflalo") - - #- If estimated proportion of fishing column is missing - if(!"PropFish" %in% colnames(eflalo)){ - eflalo$PropFish <- 1 - } - - #- Start to aggregate and raise tacsat, but drop elements through a for loop - # First aggregation without dropping any element - tacsat$INTVR <- 0 - - cat("Start minutes eflalo",sum(eflalo$INTVDAY * eflalo$PropFish,na.rm=T),"\n") - cat("Start minutes tacsat",sum(tacsat$INTV,na.rm=T),"\n") - - #- Order elements by uniqueness - if(sortBy){ - if(length(by)>1) - by <- names(sort(apply(eflalo[,by],2,function(x){length(unique(x))}))) - } - - #- Drop element by element and merge - byOrig <- by - INTVR <- numeric(nrow(tacsat)) - INTVDAY <- eflalo$INTVDAY - for(j in 0:(length(by)-1)){ - if(j != 0) - by <- rev(rev(byOrig)[-c(1:j)]) - unEf <- apply(unique(t(t(eflalo[,by]))),1,paste,collapse="_") - pEf <- apply(t(t(eflalo[,by])),1,paste,collapse="_") - pTa <- apply(t(t(tacsat[,by])),1,paste,collapse="_") - cat("Raising by",by,"\n") - for(i in 1:length(unEf)){ - idxT <- which(pTa %in% unEf[i]) - idxE <- which(pEf %in% unEf[i]) - if(length(idxT)>0 & length(idxE)>0){ - INTVR[idxT] <- sum(INTVDAY[idxE] * eflalo$PropFish[idxE],na.rm=T)/sum(tacsat$INTV[idxT],na.rm=T) * tacsat$INTV[idxT] + INTVR[idxT] - INTVDAY[idxE] <- 0 - } - } - } - tacsat$INTVR <- INTVR - eflalo$INTVDAY <- INTVDAY - - cat("Final minutes tacsat",sum(tacsat$INTVR,na.rm=T),"\n") - cat("final remaing minutes eflalo",sum(eflalo$INTVDAY,na.rm=T),"\n") - -return(tacsat)} - +#' Raise the effort in tacsat to the effort available in eflalo. +#' +#' The eflalo dataset contains a total amount of effort. This total amount can +#' be set as the total amount of effort to be found in tacsat too. This +#' function raises the total amount of effort in tacsat to that of eflalo. +#' +#' The function looks also for a 'PropFish' column. This allows users to scale +#' down the total effort in eflalo according to the proportion that is assumed +#' to be fishing effort. This helps to raise the tacsat data where only records +#' which have SI_STATE = 1 will be used +#' +#' @param tacsat Tacsat object +#' @param eflalo Eflalo object +#' @param by By can be: "SI_DAY", "SI_WEEK", "SI_MONTH", +#' "SI_YEAR","LE_RECT","LE_ICESAREA","SI_QUARTER", +#' @param sortBy Logical. If items in 'by' should be ordered by descending +#' uniqueness (if TRUE match between tacsat and eflalo increases) +#' @author Niels T. Hintzen +#' @seealso \code{\link{splitAmongPings}}, \code{\link{estimatePropFishing}} +#' @examples +#' +#' data(tacsat) +#' data(eflalo) +#' +#' #- Calculate effort in tacsat dataset and define activity (fishing) +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' tacsatp <- sortTacsat(tacsatp) +#' +#' #- Define effort on a trip level +#' tacsatp <- intervalTacsat(tacsatp,level="trip",fill.na=T,weight=c(0.5,0.5)) +#' tacsatp1<- subset(tacsatp,FT_REF==0) +#' tacsatp2<- subset(tacsatp,FT_REF!=0) +#' #- Define effort on a vessel level +#' tacsatp1<- intervalTacsat(tacsatp1,level="vessel",fill.na=T,weight=c(0.5,0.5)) +#' +#' #- Combine the two datasets with effort associated +#' tacsatp <- rbind(tacsatp1,tacsatp2) +#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)] +#' +#' #- Quick and dirty approach to defin activity +#' tacsatp <- filterTacsat(tacsatp,st=c(4,8)) +#' tacsatp$INTV[which(tacsatp$INTV>232)] <- 115 +#' tacsatp$INTV[which(is.na(tacsatp$INTV)==T)] <- 115 +#' tacsat <- tacsatp +#' tacsat$SI_STATE <- 1 +#' +#' #- Estimate the proportion fishing first (not necessary) +#' eflalo <- estimatePropFishing(tacsat,eflalo,by=c("VE_REF","FT_REF")) +#' +#' #- Raise the tacsat dataset to the effort of the eflalo dataset +#' tacsat <- raiseTacsat(tacsat,eflalo,by=c("LE_GEAR","SI_DAY","SI_YEAR"),sortBy=T) +#' tacsat <- raiseTacsat(tacsat,eflalo,by=c("LE_GEAR","SI_DAY","SI_YEAR"),sortBy=F) +#' +#' +#' @export raiseTacsat +raiseTacsat <- function(tacsat,eflalo,by=c("LE_GEAR","VE_REF","SI_DAY","LE_RECT","SI_YEAR"),sortBy=TRUE){ + + + #----------------------------------------------------------------------------- + #- Add columns on dates + #----------------------------------------------------------------------------- + if(!"SI_DATIM" %in% colnames(tacsat)) + tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME,sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + if(!"FT_DDATIM" %in% colnames(eflalo)) + eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT,eflalo$FT_DTIME),format="%d/%m/%Y %H:%M",tz = "GMT") + if(!"FT_LDATIM" %in% colnames(eflalo)) + eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT,eflalo$FT_LTIME),format="%d/%m/%Y %H:%M",tz = "GMT") + if(!"INTV" %in% colnames(tacsat)) + stop("Specify time interval column in tacsat (e.g. use intervalTacsat)") + + #- Add date notation + if("SI_DAY" %in% unique(c(by))){ + eflalo$SI_DAY <- yday(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_DAY <- yday(tacsat$SI_DATIM) + } + if("SI_YEAR" %in% unique(c(by))){ + eflalo$SI_YEAR<- year(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_YEAR<- year(tacsat$SI_DATIM) + } + if("SI_MONTH"%in% unique(c(by))){ + eflalo$SI_MONTH<- month(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_MONTH<- month(tacsat$SI_DATIM) + } + if("SI_WEEK"%in% unique(c(by))){ + eflalo$SI_WEEK<- week(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_WEEK<- week(tacsat$SI_DATIM) + } + if("SI_QUARTER"%in% unique(c(by))){ + eflalo$SI_QUARTER<- quarter(as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y")) + tacsat$SI_QUARTER<- quarter(tacsat$SI_DATIM) + } + + #- Select time levels + order these + timePos<- c("SI_DAY","SI_WEEK","SI_MONTH","SI_QUARTER","SI_YEAR") + byTime <- by[which(by %in% timePos)] + byTime <- timePos[which(timePos %in% byTime)] + + #----------------------------------------------------------------------------- + #-Add spatial location + #----------------------------------------------------------------------------- + if("LE_RECT" %in% by) + tacsat$LE_RECT <- ICESrectangle(tacsat) + if("LE_ICESAREA" %in% by){ + data(ICESareas) + tacsat$LE_AREA <- ICESarea(tacsat,ICESareas) + tacsat$LE_AREA[which(is.na(tacsat$LE_AREA)==T)] <- "OTHER" + eflonlat <- ICESrectangle2LonLat(eflalo$LE_RECT) + eflalo$LE_AREA <- ICESarea(eflonlat,ICESareas) + eflalo$LE_AREA[which(is.na(eflalo$LE_AREA)==T)] <- "OTHER" + } + + #- Select area levels + order these + areaPos<- c("LE_RECT","LE_ICESAREA") + byArea <- by[which(by %in% areaPos)] + byArea <- areaPos[which(areaPos %in% byArea)] + + #----------------------------------------------------------------------------- + #- Calculate time possible to spend per day fishing + #----------------------------------------------------------------------------- + eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT,format="%d/%m/%Y",tz="GMT") + eflalo$INTV <- c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")) + eflalo$FT_DURDAY <- ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$FT_DDATIM),units="hours") == 0), + c(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="mins")), + ifelse(c(difftime(as.Date(eflalo$FT_DDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), + c(difftime(eflalo$LE_CDATIM+(60*60*24),eflalo$FT_DDATIM,units="mins")), + ifelse(c(difftime(as.Date(eflalo$FT_LDATIM),as.Date(eflalo$LE_CDATIM),units="hours")==0), + c(difftime(eflalo$FT_LDATIM,eflalo$LE_CDATIM,units="mins")), + 1440))) + # Here there is still a problem because INTVDAY is calculated for catch days only, so you miss some effort of a whole trip + eflalo$dummy <- 1 + eflalo <- merge(eflalo,aggregate(eflalo$dummy,by=list(eflalo$FT_REF,eflalo$LE_CDATIM),FUN=sum,na.rm=T),by.x=c("FT_REF","LE_CDATIM"),by.y=c("Group.1","Group.2"),all.x=T) + colnames(eflalo)[length(colnames(eflalo))] <- "NR_FT_REF" + if("SI_DAY" %in% by){ + eflalo$INTVDAY <- eflalo$FT_DURDAY / eflalo$NR_FT_REF + } else { + eflalo$INTVDAY <- eflalo$INTV / eflalo$NR_FT_REF + } + eflalo <- eflalo[,-grep("dummy",colnames(eflalo))] + eflalo <- eflalo[,-grep("FT_DURDAY",colnames(eflalo))] + eflalo <- eflalo[,-grep("NR_FT_REF",colnames(eflalo))] + + #----------------------------------------------------------------------------- + #- Check if all colums in both tacsat and eflalo are available + #----------------------------------------------------------------------------- + if(length(which(!by %in% colnames(tacsat) | !by %in% colnames(eflalo)))>0) + stop("elements specified in 'by' are not available as columns in either tacsat or eflalo") + + #- If estimated proportion of fishing column is missing + if(!"PropFish" %in% colnames(eflalo)){ + eflalo$PropFish <- 1 + } + + #- Start to aggregate and raise tacsat, but drop elements through a for loop + # First aggregation without dropping any element + tacsat$INTVR <- 0 + + cat("Start minutes eflalo",sum(eflalo$INTVDAY * eflalo$PropFish,na.rm=T),"\n") + cat("Start minutes tacsat",sum(tacsat$INTV,na.rm=T),"\n") + + #- Order elements by uniqueness + if(sortBy){ + if(length(by)>1) + by <- names(sort(apply(eflalo[,by],2,function(x){length(unique(x))}))) + } + + #- Drop element by element and merge + byOrig <- by + INTVR <- numeric(nrow(tacsat)) + INTVDAY <- eflalo$INTVDAY + for(j in 0:(length(by)-1)){ + if(j != 0) + by <- rev(rev(byOrig)[-c(1:j)]) + unEf <- apply(unique(t(t(eflalo[,by]))),1,paste,collapse="_") + pEf <- apply(t(t(eflalo[,by])),1,paste,collapse="_") + pTa <- apply(t(t(tacsat[,by])),1,paste,collapse="_") + cat("Raising by",by,"\n") + for(i in 1:length(unEf)){ + idxT <- which(pTa %in% unEf[i]) + idxE <- which(pEf %in% unEf[i]) + if(length(idxT)>0 & length(idxE)>0){ + INTVR[idxT] <- sum(INTVDAY[idxE] * eflalo$PropFish[idxE],na.rm=T)/sum(tacsat$INTV[idxT],na.rm=T) * tacsat$INTV[idxT] + INTVR[idxT] + INTVDAY[idxE] <- 0 + } + } + } + tacsat$INTVR <- INTVR + eflalo$INTVDAY <- INTVDAY + + cat("Final minutes tacsat",sum(tacsat$INTVR,na.rm=T),"\n") + cat("final remaing minutes eflalo",sum(eflalo$INTVDAY,na.rm=T),"\n") + +return(tacsat)} + diff --git a/vmstools/R/rbindEflalo.r b/vmstools/R/rbindEflalo.r index c84af0f..c091e38 100644 --- a/vmstools/R/rbindEflalo.r +++ b/vmstools/R/rbindEflalo.r @@ -1,20 +1,41 @@ -rbindEflalo <- function(set1,set2){ - cln1 <- colnames(set1) - cln2 <- colnames(set2) - if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets") - idx1 <- which(is.na(pmatch(cln1,cln2))==TRUE) - idx2 <- which(is.na(pmatch(cln2,cln1))==TRUE) - - if(length(idx1)>0){ - for(i in idx1) set2 <- cbind(set2,NA) - colnames(set2) <- c(cln2,cln1[idx1])} - if(length(idx2)>0){ - for(i in idx2) set1 <- cbind(set1,NA) - colnames(set1) <- c(cln1,cln2[idx2])} - cln1 <- colnames(set1) - cln2 <- colnames(set2) - mtch <- pmatch(cln1,cln2) - if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names") - set3 <- rbind(set1,set2[,cln2[mtch]]) -return(set3)} - +#' Row bind two eflalo sets together +#' +#' Row binds two eflalo sets together while taking differences in column names +#' into account +#' +#' +#' @param set1 Eflalo set 1 +#' @param set2 Eflalo set 2 +#' @author Niels T. Hintzen +#' @seealso \code{\link{rbindTacsat}}, \code{\link{do.call}} +#' @references EU Lot 2 project +#' @examples +#' +#' data(eflalo) +#' set1 <- eflalo +#' set2 <- eflalo[seq(1,100,5),] +#' +#' combined <- rbindEflalo(set1,set2) +#' +#' +#' @export rbindEflalo +rbindEflalo <- function(set1,set2){ + cln1 <- colnames(set1) + cln2 <- colnames(set2) + if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets") + idx1 <- which(is.na(pmatch(cln1,cln2))==TRUE) + idx2 <- which(is.na(pmatch(cln2,cln1))==TRUE) + + if(length(idx1)>0){ + for(i in idx1) set2 <- cbind(set2,NA) + colnames(set2) <- c(cln2,cln1[idx1])} + if(length(idx2)>0){ + for(i in idx2) set1 <- cbind(set1,NA) + colnames(set1) <- c(cln1,cln2[idx2])} + cln1 <- colnames(set1) + cln2 <- colnames(set2) + mtch <- pmatch(cln1,cln2) + if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names") + set3 <- rbind(set1,set2[,cln2[mtch]]) +return(set3)} + diff --git a/vmstools/R/rbindTacsat.r b/vmstools/R/rbindTacsat.r index 5310e4a..c19e5c3 100644 --- a/vmstools/R/rbindTacsat.r +++ b/vmstools/R/rbindTacsat.r @@ -1,23 +1,44 @@ -rbindTacsat <- function(set1,set2){ - cln1 <- colnames(set1) - cln2 <- colnames(set2) - if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets") - idx1 <- which(is.na(pmatch(cln1,cln2))==TRUE) - idx2 <- which(is.na(pmatch(cln2,cln1))==TRUE) - - if(length(idx1)>0){ - for(i in idx1) set2 <- cbind(set2,NA) - colnames(set2) <- c(cln2,cln1[idx1])} - if(length(idx2)>0){ - for(i in idx2) set1 <- cbind(set1,NA) - colnames(set1) <- c(cln1,cln2[idx2])} - cln1 <- colnames(set1) - cln2 <- colnames(set2) - mtch <- pmatch(cln1,cln2) - if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names") - set3 <- rbind(set1,set2[,cln2[mtch]]) -return(set3)} - - - - +#' Row bind two tacsat sets together +#' +#' Row binds two tacsat sets together while taking differences in column names +#' into account +#' +#' +#' @param set1 Tacsat set 1 +#' @param set2 Tacsat set 2 +#' @author Niels T. Hintzen +#' @seealso \code{\link{rbindEflalo}}, \code{\link{do.call}} +#' @references EU Lot 2 project +#' @examples +#' +#' data(tacsat) +#' set1 <- tacsat +#' set2 <- tacsat[seq(1,100,5),] +#' +#' combined <- rbindTacsat(set1,set2) +#' +#' +#' @export rbindTacsat +rbindTacsat <- function(set1,set2){ + cln1 <- colnames(set1) + cln2 <- colnames(set2) + if(any(duplicated(cln1)==TRUE) || any(duplicated(cln2)==TRUE)) stop("Duplicate column names in datasets") + idx1 <- which(is.na(pmatch(cln1,cln2))==TRUE) + idx2 <- which(is.na(pmatch(cln2,cln1))==TRUE) + + if(length(idx1)>0){ + for(i in idx1) set2 <- cbind(set2,NA) + colnames(set2) <- c(cln2,cln1[idx1])} + if(length(idx2)>0){ + for(i in idx2) set1 <- cbind(set1,NA) + colnames(set1) <- c(cln1,cln2[idx2])} + cln1 <- colnames(set1) + cln2 <- colnames(set2) + mtch <- pmatch(cln1,cln2) + if(any(is.na(mtch))==TRUE) stop("Cannot find nor create all matching column names") + set3 <- rbind(set1,set2[,cln2[mtch]]) +return(set3)} + + + + diff --git a/vmstools/R/readEflalo.r b/vmstools/R/readEflalo.r index 03a15e3..742e86e 100644 --- a/vmstools/R/readEflalo.r +++ b/vmstools/R/readEflalo.r @@ -1,20 +1,47 @@ -readEflalo <- function(file,sep=",",dec="."){ - - #Read the data - res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE) - - #Perform checks - if(any(!c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME", - "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON", - "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ") %in% colnames(res))) - stop(paste(file,"needs correct header including",paste(c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME", - "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON", - "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ")))) - if(length(grep("KG",colnames(res))) < 1) stop("Units of landings are not in KG") - if(length(grep("LE_KG",colnames(res))) < 1) stop("No landing data provided") - if(length(grep("LE_EURO",colnames(res))) < 1) warning("Currency used is different than 'EURO'") - - #Reformat the data - res <- formatEflalo(res) - - return(res)} \ No newline at end of file +#' Read Eflalo data into R +#' +#' Reading eflalo data from delimited file into R, checking for obligatory +#' columns and formatting all data columns. +#' +#' +#' @param file file path + file name +#' @param sep delimiter used in file (default to ',') +#' @param dec decimal notation used in file (default to '.') +#' @return Returns the formatted Eflalo dataset +#' @author Niels T. Hintzen +#' @seealso \code{\link{readTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(eflalo) +#' dir.create("C:/tmpEflalo") +#' +#' #temporarily write tacsat file to disk to thereafter read it back in again +#' write.table(eflalo, file = "C:/tmpEflalo/eflalo.csv", quote = TRUE, sep = ",", +#' eol = "\n", na = "NA", dec = ".", row.names = FALSE,col.names = TRUE) +#' +#' #Read in tacsat file +#' eflalo <- readEflalo("C:/tmpEflalo/eflalo.csv") +#' +#' +#' @export readEflalo +readEflalo <- function(file,sep=",",dec="."){ + + #Read the data + res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE) + + #Perform checks + if(any(!c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME", + "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON", + "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ") %in% colnames(res))) + stop(paste(file,"needs correct header including",paste(c("VE_REF","VE_FLT","VE_COU","VE_LEN","VE_KW","VE_TON","FT_REF","FT_DCOU","FT_DHAR","FT_DDAT","FT_DTIME", + "FT_LCOU","FT_LHAR","FT_LDAT","FT_LTIME","LE_ID","LE_CDAT","LE_STIME","LE_ETIME","LE_SLAT","LE_SLON", + "LE_ELON","LE_ELON","LE_GEAR","LE_MSZ")))) + if(length(grep("KG",colnames(res))) < 1) stop("Units of landings are not in KG") + if(length(grep("LE_KG",colnames(res))) < 1) stop("No landing data provided") + if(length(grep("LE_EURO",colnames(res))) < 1) warning("Currency used is different than 'EURO'") + + #Reformat the data + res <- formatEflalo(res) + + return(res)} diff --git a/vmstools/R/readTacsat.r b/vmstools/R/readTacsat.r index 9c1f18d..44384ca 100644 --- a/vmstools/R/readTacsat.r +++ b/vmstools/R/readTacsat.r @@ -1,14 +1,45 @@ -readTacsat <- function(file,sep=",",dec="."){ - - #Read the data - res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE) - - #Perform checks - if(any(!c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE") %in% colnames(res))) stop(paste(file,"needs correct header including",paste("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE"))) - if(any(res$SI_LATI > 90 | res$SI_LATI < -90,na.rm=TRUE) | any(res$SI_LONG > 180 | res$SI_LONG < -180,na.rm=TRUE)) stop("Longitudes or latitudes are out of range") - if(any(res$SI_HE > 360 | res$SI_HE < 0,na.rm=TRUE)) stop("Heading out of range, must be between 0 - 360") - - #Reformat the data - res <- formatTacsat(res) - - return(res)} \ No newline at end of file +#' Read Tacsat data into R +#' +#' Reading tacsat data from delimited file into R, checking for obligatory +#' columns and formatting all data columns. +#' +#' +#' @param file file path + file name +#' @param sep delimiter used in file (default to ',') +#' @param dec decimal notation used in file (default to '.') +#' @return Returns the formatted Tacsat dataset +#' @author Niels T. Hintzen +#' @seealso \code{readEflalo()} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' dir.create("C:/tmpTacsat") +#' +#' #temporarily write tacsat file to disk to thereafter read it back in again +#' tacsat$SI_HE[which(tacsat$SI_HE>360 | tacsat$SI_HE < 0)] <- NA +#' tacsat$SI_LONG[which(tacsat$SI_LONG < -180 | tacsat$SI_LONG > 180)] <- NA +#' tacsat$SI_LATI[which(tacsat$SI_LATI < -90 | tacsat$SI_LATI > 90)] <- NA +#' write.table(tacsat, file = "C:/tmpTacsat/tacsat.csv", quote = TRUE, sep = ",", +#' eol = "\n", na = "NA", dec = ".", row.names = FALSE,col.names = TRUE) +#' +#' #Read in tacsat file +#' tacsat <- readTacsat("C:/tmpTacsat/tacsat.csv") +#' +#' +#' +#' @export readTacsat +readTacsat <- function(file,sep=",",dec="."){ + + #Read the data + res <- read.table(file, header = TRUE,sep,dec = ".",stringsAsFactors = FALSE) + + #Perform checks + if(any(!c("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE") %in% colnames(res))) stop(paste(file,"needs correct header including",paste("VE_REF","SI_LATI","SI_LONG","SI_DATE","SI_TIME","SI_SP","SI_HE"))) + if(any(res$SI_LATI > 90 | res$SI_LATI < -90,na.rm=TRUE) | any(res$SI_LONG > 180 | res$SI_LONG < -180,na.rm=TRUE)) stop("Longitudes or latitudes are out of range") + if(any(res$SI_HE > 360 | res$SI_HE < 0,na.rm=TRUE)) stop("Heading out of range, must be between 0 - 360") + + #Reformat the data + res <- formatTacsat(res) + + return(res)} diff --git a/vmstools/R/scree.r b/vmstools/R/scree.r index 95d4fee..afc146d 100644 --- a/vmstools/R/scree.r +++ b/vmstools/R/scree.r @@ -1,17 +1,45 @@ -################################### -# Implementation of "scree-test" # -################################### - -scree=function(eig){ - n=length(eig) - delta=numeric(n) - epsilon=numeric(n) - delta[2]=eig[2]-eig[1] - for (i in 3:n){ - delta[i]=eig[i]-eig[i-1] - epsilon[i]=delta[i]-delta[i-1] - } - data=matrix(0,nrow=n,ncol=3) - data=cbind(valeurs_propres=eig, delta=delta, epsilon=epsilon) - return(data) -} +################################### +# Implementation of "scree-test" # +################################### + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export scree +scree=function(eig){ + n=length(eig) + delta=numeric(n) + epsilon=numeric(n) + delta[2]=eig[2]-eig[1] + for (i in 3:n){ + delta[i]=eig[i]-eig[i-1] + epsilon[i]=delta[i]-delta[i-1] + } + data=matrix(0,nrow=n,ncol=3) + data=cbind(valeurs_propres=eig, delta=delta, epsilon=epsilon) + return(data) +} diff --git a/vmstools/R/segmentTacsatSpeed.r b/vmstools/R/segmentTacsatSpeed.r index 6c0850b..e25a8c8 100644 --- a/vmstools/R/segmentTacsatSpeed.r +++ b/vmstools/R/segmentTacsatSpeed.r @@ -1,417 +1,488 @@ -# do the distinction between fishing and non-fishing -# by automatic detection of the fishing peak -# adapted from library 'segmented' -# F. Bastardie - -segmentTacsatSpeed <- function(tacsat, - vessels= unique(tacsat$VE_REF), - force.lower.bound=0.5, - gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'), - general=list( - output.path=file.path('C:','output'), - visual.check=TRUE, - a.year=2009, - what.speed="calculated", - ), - ...){ - - - lstargs <- as.list( sys.call() ) # deprecated - - # checks - if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE <- NA - if(!'idx' %in% colnames(tacsat)) tacsat$idx <- 1:nrow(tacsat) - cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n") - dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777") - - # add - tacsat$bound1 <- NA - tacsat$bound2 <- NA - - - # utils--- - distAB.f <- function(A,B, .unit="km"){ - # return the dist in km or nautical miles. - # coord input: e.g. A[54,10] means [54 N, 10 E] - # formula = cos (gamma) = sin LatA sin LatB + cos LatA cos LatB cos delta - # if gamma in degree, knowing that 1 degree = 60 minutes = 60 nm, then gamma in degree * 60 = dist in nm - # we can also use the radius R of the earth: the length of the arc AB is given by R*gamma with gamma in radians - p = 180/pi; Rearth = 6378.388 # in km - nm <- 1852e-3 # 1852 m for 1 nm - res <- switch(.unit, - km = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))), - nm = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))) / nm - ) - names(res) <- NULL - return(res) - } - - # utils--- - # library(segmented) - # from library(segmented) # getS3method("segmented", "lm") - ## CAUTION: UNFORTUNATELY, THE FUNCTION NEED TO BE PLACED HERE BECAUSE A PROBLEM OF ENV - seg.control <- function (toll = 1e-04, it.max = 20, display = FALSE, last = TRUE, - maxit.glm = 25, h = 1){ - list(toll = toll, it.max = it.max, visual = display, last = last, - maxit.glm = maxit.glm, h = h) - } - segmented <- function (obj, seg.Z, psi, control = seg.control(), model.frame = TRUE, - ...){ - it.max <- old.it.max <- control$it.max - toll <- control$toll - visual <- control$visual - last <- control$last - h <- min(abs(control$h), 1) - if (h < 1) - it.max <- it.max + round(it.max/2) - objframe <- update(obj, model = TRUE, x = TRUE, y = TRUE) - y <- objframe$y - a <- model.matrix(seg.Z, data = eval(obj$call$data)) - a <- subset(a, select = colnames(a)[-1]) - n <- nrow(a) - Z <- list() - for (i in colnames(a)) Z[[length(Z) + 1]] <- a[, i] - name.Z <- names(Z) <- colnames(a) - if (length(Z) == 1 && is.vector(psi) && is.numeric(psi)) { - psi <- list(as.numeric(psi)) - names(psi) <- name.Z - } - if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || - is.null(names(psi))) - stop("Z and psi have to be *named* list") - nomiZpsi <- match(names(Z), names(psi)) - if (!identical(length(Z), length(psi)) || any(is.na(nomiZpsi))) - stop("Length or names of Z and psi do not match") - dd <- match(names(Z), names(psi)) - nome <- names(psi)[dd] - psi <- psi[nome] - a <- sapply(psi, length) - b <- rep(1:length(a), times = a) - Znew <- list() - for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i], - a[i]) - Z <- matrix(unlist(Znew), nrow = n) - colnames(Z) <- rep(nome, a) - psi <- unlist(psi) - k <- ncol(Z) - PSI <- matrix(rep(psi, rep(n, k)), ncol = k) - nomiZ <- rep(nome, times = a) - ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], - function(xxx) { - 1:xxx - }))) - nomiU <- paste("U", ripetizioni, sep = "") - nomiU <- paste(nomiU, nomiZ, sep = ".") - KK <- new.env() - for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]), - objframe$model[[i]], envir = KK) - if (it.max == 0) { - U <- pmax((Z - PSI), 0) - colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") - nomiU <- paste("U", colnames(U), sep = "") - for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) - Fo <- update.formula(formula(obj), as.formula(paste(".~.+", - paste(nomiU, collapse = "+")))) - obj <- update(obj, formula = Fo, data = KK) - if (model.frame) - obj$model <- data.frame(as.list(KK)) - obj$psi <- psi - return(obj) - } - XREG <- model.matrix(obj) - o <- model.offset(objframe) - w <- model.weights(objframe) - if (is.null(w)) - w <- rep(1, n) - if (is.null(o)) - o <- rep(0, n) - initial <- psi - it <- 1 - epsilon <- 10 - obj0 <- obj - list.obj <- list(obj) - psi.values <- NULL - rangeZ <- apply(Z, 2, range) - H <- 1 - while (abs(epsilon) > toll) { - U <- pmax((Z - PSI), 0) - V <- ifelse((Z > PSI), -1, 0) - dev.old <- sum(obj$residuals^2) - - X <- cbind(XREG, U, V) - rownames(X) <- NULL - if (ncol(V) == 1) - colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c("U", "V") - else colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", - 1:k, sep = ""), paste("V", 1:k, sep = "")) - obj <- lm.wfit(x = X, y = y, w = w, offset = o) - dev.new <- sum(obj$residuals^2) - if (visual) { - if (it == 1) - cat(0, " ", formatC(dev.old, 3, format = "f"), - "", "(No breakpoint(s))", "\n") - spp <- if (it < 10) - "" - else NULL - cat(it, spp, "", formatC(dev.new, 3, format = "f"), - "\n") - } - epsilon <- (dev.new - dev.old)/dev.old - obj$epsilon <- epsilon - it <- it + 1 - obj$it <- it - class(obj) <- c("segmented", class(obj)) - list.obj[[length(list.obj) + ifelse(last == TRUE, 0, - 1)]] <- obj - if (k == 1) { - beta.c <- coef(obj)["U"] - gamma.c <- coef(obj)["V"] - } - else { - beta.c <- coef(obj)[paste("U", 1:k, sep = "")] - gamma.c <- coef(obj)[paste("V", 1:k, sep = "")] - } - if (it > it.max) - break - psi.values[[length(psi.values) + 1]] <- psi.old <- psi - if (it >= old.it.max && h < 1) - H <- h - psi <- psi.old + H * gamma.c/beta.c - PSI <- matrix(rep(psi, rep(nrow(Z), ncol(Z))), ncol = ncol(Z)) - a <- apply((Z < PSI), 2, all) - b <- apply((Z > PSI), 2, all) - if (sum(a + b) != 0 || is.na(sum(a + b))) - stop("(Some) estimated psi out of its range") - obj$psi <- psi - } - psi.values[[length(psi.values) + 1]] <- psi - id.warn <- FALSE - if (it > it.max) { - warning("max number of iterations attained", call. = FALSE) - id.warn <- TRUE - } - Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) - colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") - colnames(Vxb) <- paste(ripetizioni, nomiZ, sep = ".") - nomiU <- paste("U", colnames(U), sep = "") - nomiVxb <- paste("psi", colnames(Vxb), sep = "") - for (i in 1:ncol(U)) { - assign(nomiU[i], U[, i], envir = KK) - assign(nomiVxb[i], Vxb[, i], envir = KK) - } - nnomi <- c(nomiU, nomiVxb) - Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", - paste(nnomi, collapse = "+")))) - objF <- update(obj0, formula = Fo, data = KK) - Cov <- vcov(objF) - id <- match(paste("psi", colnames(Vxb), sep = ""), names(coef(objF))) - vv <- if (length(id) == 1) - Cov[id, id] - else diag(Cov[id, id]) - psi <- cbind(initial, psi, sqrt(vv)) - rownames(psi) <- colnames(Cov)[id] - colnames(psi) <- c("Initial", "Est.", "St.Err") - objF$rangeZ <- rangeZ - objF$psi.history <- psi.values - objF$psi <- psi - objF$it <- (it - 1) - objF$epsilon <- epsilon - objF$call <- match.call() - objF$nameUV <- list(U = paste("U", colnames(Vxb), sep = ""), - V = rownames(psi), Z = name.Z) - objF$id.warn <- id.warn - if (model.frame) - objF$mframe <- data.frame(as.list(KK)) - class(objF) <- c("segmented", class(obj0)) - list.obj[[length(list.obj) + 1]] <- objF - class(list.obj) <- "segmented" - if (last) - list.obj <- list.obj[[length(list.obj)]] - return(list.obj) - } - - - # hereafter, the core code... - # A FOR-LOOP BY VESSEL-------------------- - for(a.vesselid in vessels){ - tacsat.this.vessel <- tacsat[tacsat$VE_REF %in% a.vesselid, ] - - if('SI_DATE' %in% colnames(tacsat.this.vessel)) - { - tacsat.this.vessel$SI_DATIM <- strptime( paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME), - tz='GMT', "%e/%m/%Y %H:%M" ) - } else{ - if(!any(colnames(tacsat.this.vessel) %in% 'SI_DATIM')) stop('you need either to inform a SI_DATIM or a SI_DATE') - } - - if(is.null(general$speed)) general$speed <- "calculated" - if(general$speed=="calculated"){ - # add a apparent speed colunm (nautical miles per hour) - diff.time <- tacsat.this.vessel[-nrow(tacsat.this.vessel),"SI_DATIM"] - - tacsat.this.vessel[-1,"SI_DATIM"] - tacsat.this.vessel$diff.time.mins <- c(0, as.numeric(diff.time, units="mins")) - tacsat.this.vessel$speed <- signif( - c(0, distAB.f(A= tacsat.this.vessel[-nrow(tacsat.this.vessel),c("SI_LATI","SI_LONG")], - B= tacsat.this.vessel[-1,c("SI_LATI","SI_LONG")], .unit="nm") / (abs(tacsat.this.vessel[-1,]$diff.time.mins)/60)), - 3) - } - if(general$speed=="observed"){ - tacsat.this.vessel$speed <- tacsat.this.vessel$SI_SP # instantaneous speed - } - - # cleaning irrealistic points - tacsat.this.vessel$speed <- - replace(tacsat.this.vessel$speed, is.na(tacsat.this.vessel$speed), 0) - - - idx <- tacsat.this.vessel[tacsat.this.vessel$speed >= 30 | - is.infinite(tacsat.this.vessel$speed),"idx"] - tacsat <- tacsat[!tacsat$idx %in% idx,] # just remove! - tacsat.this.vessel <- tacsat.this.vessel[tacsat.this.vessel$speed < 30,] - - # check - if(is.null(levels(factor(tacsat.this.vessel$LE_GEAR)))) - stop('you need first to assign a gear LE_GEAR to each ping') - - - # A FOR-LOOP BY GEAR-------------------- - for (gr in levels(factor(tacsat.this.vessel$LE_GEAR))){ - - xxx <- tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,] # input - - x <- as.numeric(as.character(sort(xxx$speed))) *100 # multiply by factor 100 because integer needed - hi <- hist(x, nclass=30,plot=FALSE) - - y <- c(1:length(sort(xxx$speed))) # sort in increasing order - y <- y[x>100 & x<1000] # just removing the 0, and the larger speeds we 100% know it is steaming - x <- x[x>100 & x<1000] - dati <- data.frame(x=x,y=y) - dati$x <- as.integer(dati$x) # integer needed - psi <- list(x= quantile(dati$x,probs=c(0.05,0.5)) ) - assign('dati', dati, envir=.GlobalEnv) # DEBUG segmented()...this function looks in the global env to get dati!! - # get good start guesses - hi$counts <- hi$counts[-1] - idx <- which(hi$counts==max(hi$counts))[1] - more.frequent.speed <- hi$mids[idx] # assumed to be for fishing - - a.flag <- FALSE - if(is.na(more.frequent.speed) | length(x)==0){ a.flag <- TRUE - } else { - while(more.frequent.speed > 700 || more.frequent.speed < 100){ - hi$counts <- hi$counts[-idx] - hi$mids <- hi$mids[-idx] - idx <- which(hi$counts==max(hi$counts))[1] - more.frequent.speed <- hi$mids[idx] - if(is.na(more.frequent.speed)){ - #=> for very few cases we have no speed found within 100-700 e.g. for the UNK gear - a.flag <- TRUE ; break - } - } - } - if(!a.flag){ - start.lower.bound <- ifelse(more.frequent.speed-200<= min(dati$x), - min(dati$x)+100, more.frequent.speed-200) - start.upper.bound <- ifelse(more.frequent.speed+200>= max(dati$x), - max(dati$x)-100, more.frequent.speed+200) - psi <- list(x= c(start.lower.bound, start.upper.bound) ) - psi$x[1] <- dati$x [dati$x<=psi$x[1]] [length(dati$x [dati$x<=psi$x[1]])] # get the bound value of the start guess from dati$x - psi$x[2] <- dati$x [dati$x>psi$x[2]] [1] - o <- 1 ; class(o) <- "try-error" ; count <- 0 ; bound1 <-NULL ; bound2 <- NULL; - while(class(o)=="try-error"){ - count <- count+1 - o <- try( - segmented(lm(y~x, data=dati) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses - silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so... - if(!"try-error" %in% class(o)) break else psi <- list(x=c(psi$x[1],psi$x[2]-20)) # searching decreasing by 20 each time - if(count>10) {bound1 <- start.lower.bound; bound2 <- start.upper.bound ; cat(paste("failure of the segmented regression for",a.vesselid,gr,"\n...")); break} - } - if(is.null(bound1)) bound1 <- o$psi[order(o$psi[,"Est."])[1],"Est."] -20 # -20 hard to justify... - if(is.null(bound2)) bound2 <- o$psi[order(o$psi[,"Est."])[2],"Est."] +20 - - if(general$visual.check){ - X11() - par(mfrow=c(2,1)) - if(!"try-error" %in% class(o)){ - plot(dati$x/100, o$fitted.values, type="l", - ylab="cumulative distrib.", xlab="Knots", - main=paste("segmented regression - ",a.vesselid)) - } else{plot(0,0,type="n")} - #plot(hi) - #points(dati$x,dati$y) - tmp <- as.numeric(as.character(sort(xxx$speed))) - his <- hist(tmp, nclass=100, main=paste(general$speed,"speed between consecutive points"), - xlab="apparent speed [knots]", plot=TRUE) - if(!is.null(bound1)) abline(v=bound1/100,col=2) - if(!is.null(bound2)) abline(v=bound2/100,col=2) - if(!is.null(bound1)) text(bound1/100, median(his$counts), signif(bound1,3), col=2, cex=2) - if(!is.null(bound2)) text(bound2/100, median(his$counts), signif(bound2,3), col=2, cex=2) - # save the panel plot - savePlot(filename = file.path(general$output.path, - paste(unique(a.vesselid),"-detected_speed_span_for_feffort-", general$a.year,"-",gr, sep="")), - type = c("wmf"), device = dev.cur(), restoreConsole = TRUE) - dev.off() - } - - # so, - bound1 <- bound1 / 100 # transform back - bound2 <- bound2 / 100 # transform back - xxx$speed <- replace(xxx$speed, is.na(xxx$speed), 0) # debug 0/0 - - - - # maybe you want to only keep the upper bound - # and then set the lower one for a particular gear? - if(gr %in% gears.to.force){ - bound1 <- force.lower.bound - cat(paste(gr," force lower bound at", force.lower.bound, "knots\n")) - } - - # then, assign... - xxx[xxx$speed < bound1, "SI_STATE"] <- 2 # steaming - xxx[xxx$speed >= bound1 & xxx$speed < bound2, "SI_STATE"] <- 1 # fishing - xxx[xxx$speed >= bound2 , "SI_STATE"] <- 2 # steaming - tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- xxx$SI_STATE # output - tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound1"] <- bound1 - tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound2"] <- bound2 - cat(paste(gr," lower", general$speed,"speed bound:",round(bound1,1),"knots\n")) - cat(paste(gr," upper", general$speed,"speed bound:",round(bound2,1),"knots\n")) - } else{ - tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- 2 - #=> end a.flag: in case of very few records for this gear... - } - } # end gr - - - # clean up by removing no longer used columns - tacsat.this.vessel <- tacsat.this.vessel[, !colnames(tacsat.this.vessel) %in% - c('speed','diff.time.mins')] - - tacsat[tacsat$VE_REF==a.vesselid,] <- tacsat.this.vessel # output - } # end of a.vesselid - - - # plot for speed bound distribution - tacsat2 <- tacsat[tacsat$VE_REF %in% vessels & !duplicated(tacsat[,c("VE_REF","LE_GEAR")]),] - b1 <- tapply(tacsat2$bound1, tacsat2$LE_GEAR, mean, na.rm=TRUE) - b2 <- tapply(tacsat2$bound2, tacsat2$LE_GEAR, mean, na.rm=TRUE) - cat(paste("lower speed bound mean:",round(b1,1),"knots\n")) - cat(paste("upper speed bound mean:",round(b2,1),"knots\n")) - if(general$visual.check){ - X11() - op <- par(no.readonly = TRUE) - par(mfrow=c(1,2)) - boxplot(bound1 ~ LE_GEAR, data=tacsat2, ylim=c(0,8)) - boxplot(bound2 ~ LE_GEAR, data=tacsat2, ylim=c(0,8)) - par(op) - dev.off() - } - # save - write.table(tacsat2[,c("VE_REF","LE_GEAR","bound1","bound2")], append=TRUE, - file = file.path(general$output.path, - paste("speed_bounds_per_vessel_per_gear_", general$a.year,".txt", sep="")), - quote=FALSE, col.names=FALSE, row.names=FALSE) - - -return(tacsat[tacsat$VE_REF %in% vessels, !colnames(tacsat) %in% c('bound1','bound2')]) -} - +# do the distinction between fishing and non-fishing +# by automatic detection of the fishing peak +# adapted from library 'segmented' +# F. Bastardie + + + +#' do the distinction between fishing/non-fishing detecting the fishing speed +#' peak +#' +#' do the distinction between fishing/non-fishing based on (semi)automatic +#' detection of the fishing speed peak to account for the skipper/vessel +#' effect. This is done per vessel and independently for each gear of the +#' vessel. A segmented regression is performed on the cumulative speed +#' distribution to identify the speed breakpoints bounding the fishing peak +#' i.e. the first peak. The method returns tacsat (a subset if vessels +#' informed) with the SI_STATE informed. +#' +#' A (quick and dirty) vessel and gear-specific method has been suggested by +#' Bastardie et al. 2010 for dividing the fishing positions from the +#' non-fishing positions (or 'steaming') based on vessel speed profile. Data +#' from trips with observers on board recording vessel movement with a high +#' spatial resolution (e.g. a position recorded every minute) demonstrated that +#' the vessel speed frequency histogram is usually bimodal for commercial +#' trawlers, where the first low speed peak likely corresponds to fishing +#' activity, while the upper peak corresponds to faster movements, e.g. +#' steaming between fishing grounds and harbours. Low speeds can also +#' correspond, for instance, to a regulated speed leaving a harbour. +#' +#' Speed boundaries were accordingly determined by applying a segmented +#' regression to the cumulative distribution of vessel speeds ('calculated' or +#' 'observed', see the argument of the function) as a practical way to +#' automatically detect break points encapsulating the first peak of the speed +#' frequency histogram. The computation is preferably done on calculated speed +#' instead of instantaneous (observed) speeds to avoid possible +#' non-representative records of speed (discrete records usually with large +#' time-span between). Peak detection is done for each vessel and for each of +#' its gear types to account for individual skipper behaviour and gear-specific +#' constraints on fishing activity. The present methodology is however not +#' well suited for the particular gillnet and seine activities because no peak +#' are found from observations of speed histograms. For these fishing +#' activities, only the upper boundary of the automatic peak detection should +#' be retained, assuming that fishing start at a given value in speed e.g. 0.5 +#' nm. +#' +#' @param tacsat tacsat with LE_GEAR informed for each ping +#' @param general a list of general settings e.g. the output path for saving +#' the plot +#' @param ... vessels, a vector of vessel identifiers VE_REF that can be found +#' in tacsat. force.lower.bound might also be used to fix the lower speed at +#' some value for gillnet and seine gear types +#' @author Francois Bastardie +#' @seealso \code{mergeEflalo2Pings} +#' @references Bastardie et al. 2010. Fisheries Research +#' @examples +#' +#' +#' \dontrun{ +#' data(tacsat) +#' # fake here, but an informed LE_GEAR is required +#' tacsat$LE_GEAR <-"OTB" +#' tacsat <- segmentTacsatSpeed ( +#' tacsat=tacsat, +#' vessels="35", +#' force.lower.bound= 0.5, +#' gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'), +#' general= +#' list( +#' output.path=file.path('C:','output'), +#' visual.check=TRUE, speed="calculated" +#' ) +#' ) +#' } +#' +#' +#' @export segmentTacsatSpeed +segmentTacsatSpeed <- function(tacsat, + vessels= unique(tacsat$VE_REF), + force.lower.bound=0.5, + gears.to.force= c('GNS','GND','GNC','GNF','GTR','GTN','GEN','GN','SDN','SSC'), + general=list( + output.path=file.path('C:','output'), + visual.check=TRUE, + a.year=2009, + what.speed="calculated", + ), + ...){ + + + lstargs <- as.list( sys.call() ) # deprecated + + # checks + if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE <- NA + if(!'idx' %in% colnames(tacsat)) tacsat$idx <- 1:nrow(tacsat) + cat("if it still doesn't exist, 'results' folder is created in ",general$output.path,"\n") + dir.create(general$output.path, showWarnings = TRUE, recursive = TRUE, mode = "0777") + + # add + tacsat$bound1 <- NA + tacsat$bound2 <- NA + + + # utils--- + distAB.f <- function(A,B, .unit="km"){ + # return the dist in km or nautical miles. + # coord input: e.g. A[54,10] means [54 N, 10 E] + # formula = cos (gamma) = sin LatA sin LatB + cos LatA cos LatB cos delta + # if gamma in degree, knowing that 1 degree = 60 minutes = 60 nm, then gamma in degree * 60 = dist in nm + # we can also use the radius R of the earth: the length of the arc AB is given by R*gamma with gamma in radians + p = 180/pi; Rearth = 6378.388 # in km + nm <- 1852e-3 # 1852 m for 1 nm + res <- switch(.unit, + km = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))), + nm = Rearth * acos(sin(A[,1]/p)*sin(B[,1]/p) + (cos(A[,1]/p) * cos(B[,1]/p)*cos(A[,2]/p - B[,2]/p))) / nm + ) + names(res) <- NULL + return(res) + } + + # utils--- + # library(segmented) + # from library(segmented) # getS3method("segmented", "lm") + ## CAUTION: UNFORTUNATELY, THE FUNCTION NEED TO BE PLACED HERE BECAUSE A PROBLEM OF ENV + seg.control <- function (toll = 1e-04, it.max = 20, display = FALSE, last = TRUE, + maxit.glm = 25, h = 1){ + list(toll = toll, it.max = it.max, visual = display, last = last, + maxit.glm = maxit.glm, h = h) + } + segmented <- function (obj, seg.Z, psi, control = seg.control(), model.frame = TRUE, + ...){ + it.max <- old.it.max <- control$it.max + toll <- control$toll + visual <- control$visual + last <- control$last + h <- min(abs(control$h), 1) + if (h < 1) + it.max <- it.max + round(it.max/2) + objframe <- update(obj, model = TRUE, x = TRUE, y = TRUE) + y <- objframe$y + a <- model.matrix(seg.Z, data = eval(obj$call$data)) + a <- subset(a, select = colnames(a)[-1]) + n <- nrow(a) + Z <- list() + for (i in colnames(a)) Z[[length(Z) + 1]] <- a[, i] + name.Z <- names(Z) <- colnames(a) + if (length(Z) == 1 && is.vector(psi) && is.numeric(psi)) { + psi <- list(as.numeric(psi)) + names(psi) <- name.Z + } + if (!is.list(Z) || !is.list(psi) || is.null(names(Z)) || + is.null(names(psi))) + stop("Z and psi have to be *named* list") + nomiZpsi <- match(names(Z), names(psi)) + if (!identical(length(Z), length(psi)) || any(is.na(nomiZpsi))) + stop("Length or names of Z and psi do not match") + dd <- match(names(Z), names(psi)) + nome <- names(psi)[dd] + psi <- psi[nome] + a <- sapply(psi, length) + b <- rep(1:length(a), times = a) + Znew <- list() + for (i in 1:length(psi)) Znew[[length(Znew) + 1]] <- rep(Z[i], + a[i]) + Z <- matrix(unlist(Znew), nrow = n) + colnames(Z) <- rep(nome, a) + psi <- unlist(psi) + k <- ncol(Z) + PSI <- matrix(rep(psi, rep(n, k)), ncol = k) + nomiZ <- rep(nome, times = a) + ripetizioni <- as.numeric(unlist(sapply(table(nomiZ)[order(unique(nomiZ))], + function(xxx) { + 1:xxx + }))) + nomiU <- paste("U", ripetizioni, sep = "") + nomiU <- paste(nomiU, nomiZ, sep = ".") + KK <- new.env() + for (i in 1:ncol(objframe$model)) assign(names(objframe$model[i]), + objframe$model[[i]], envir = KK) + if (it.max == 0) { + U <- pmax((Z - PSI), 0) + colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") + nomiU <- paste("U", colnames(U), sep = "") + for (i in 1:ncol(U)) assign(nomiU[i], U[, i], envir = KK) + Fo <- update.formula(formula(obj), as.formula(paste(".~.+", + paste(nomiU, collapse = "+")))) + obj <- update(obj, formula = Fo, data = KK) + if (model.frame) + obj$model <- data.frame(as.list(KK)) + obj$psi <- psi + return(obj) + } + XREG <- model.matrix(obj) + o <- model.offset(objframe) + w <- model.weights(objframe) + if (is.null(w)) + w <- rep(1, n) + if (is.null(o)) + o <- rep(0, n) + initial <- psi + it <- 1 + epsilon <- 10 + obj0 <- obj + list.obj <- list(obj) + psi.values <- NULL + rangeZ <- apply(Z, 2, range) + H <- 1 + while (abs(epsilon) > toll) { + U <- pmax((Z - PSI), 0) + V <- ifelse((Z > PSI), -1, 0) + dev.old <- sum(obj$residuals^2) + + X <- cbind(XREG, U, V) + rownames(X) <- NULL + if (ncol(V) == 1) + colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c("U", "V") + else colnames(X)[(ncol(XREG) + 1):ncol(X)] <- c(paste("U", + 1:k, sep = ""), paste("V", 1:k, sep = "")) + obj <- lm.wfit(x = X, y = y, w = w, offset = o) + dev.new <- sum(obj$residuals^2) + if (visual) { + if (it == 1) + cat(0, " ", formatC(dev.old, 3, format = "f"), + "", "(No breakpoint(s))", "\n") + spp <- if (it < 10) + "" + else NULL + cat(it, spp, "", formatC(dev.new, 3, format = "f"), + "\n") + } + epsilon <- (dev.new - dev.old)/dev.old + obj$epsilon <- epsilon + it <- it + 1 + obj$it <- it + class(obj) <- c("segmented", class(obj)) + list.obj[[length(list.obj) + ifelse(last == TRUE, 0, + 1)]] <- obj + if (k == 1) { + beta.c <- coef(obj)["U"] + gamma.c <- coef(obj)["V"] + } + else { + beta.c <- coef(obj)[paste("U", 1:k, sep = "")] + gamma.c <- coef(obj)[paste("V", 1:k, sep = "")] + } + if (it > it.max) + break + psi.values[[length(psi.values) + 1]] <- psi.old <- psi + if (it >= old.it.max && h < 1) + H <- h + psi <- psi.old + H * gamma.c/beta.c + PSI <- matrix(rep(psi, rep(nrow(Z), ncol(Z))), ncol = ncol(Z)) + a <- apply((Z < PSI), 2, all) + b <- apply((Z > PSI), 2, all) + if (sum(a + b) != 0 || is.na(sum(a + b))) + stop("(Some) estimated psi out of its range") + obj$psi <- psi + } + psi.values[[length(psi.values) + 1]] <- psi + id.warn <- FALSE + if (it > it.max) { + warning("max number of iterations attained", call. = FALSE) + id.warn <- TRUE + } + Vxb <- V %*% diag(beta.c, ncol = length(beta.c)) + colnames(U) <- paste(ripetizioni, nomiZ, sep = ".") + colnames(Vxb) <- paste(ripetizioni, nomiZ, sep = ".") + nomiU <- paste("U", colnames(U), sep = "") + nomiVxb <- paste("psi", colnames(Vxb), sep = "") + for (i in 1:ncol(U)) { + assign(nomiU[i], U[, i], envir = KK) + assign(nomiVxb[i], Vxb[, i], envir = KK) + } + nnomi <- c(nomiU, nomiVxb) + Fo <- update.formula(formula(obj0), as.formula(paste(".~.+", + paste(nnomi, collapse = "+")))) + objF <- update(obj0, formula = Fo, data = KK) + Cov <- vcov(objF) + id <- match(paste("psi", colnames(Vxb), sep = ""), names(coef(objF))) + vv <- if (length(id) == 1) + Cov[id, id] + else diag(Cov[id, id]) + psi <- cbind(initial, psi, sqrt(vv)) + rownames(psi) <- colnames(Cov)[id] + colnames(psi) <- c("Initial", "Est.", "St.Err") + objF$rangeZ <- rangeZ + objF$psi.history <- psi.values + objF$psi <- psi + objF$it <- (it - 1) + objF$epsilon <- epsilon + objF$call <- match.call() + objF$nameUV <- list(U = paste("U", colnames(Vxb), sep = ""), + V = rownames(psi), Z = name.Z) + objF$id.warn <- id.warn + if (model.frame) + objF$mframe <- data.frame(as.list(KK)) + class(objF) <- c("segmented", class(obj0)) + list.obj[[length(list.obj) + 1]] <- objF + class(list.obj) <- "segmented" + if (last) + list.obj <- list.obj[[length(list.obj)]] + return(list.obj) + } + + + # hereafter, the core code... + # A FOR-LOOP BY VESSEL-------------------- + for(a.vesselid in vessels){ + tacsat.this.vessel <- tacsat[tacsat$VE_REF %in% a.vesselid, ] + + if('SI_DATE' %in% colnames(tacsat.this.vessel)) + { + tacsat.this.vessel$SI_DATIM <- strptime( paste(tacsat.this.vessel$SI_DATE, tacsat.this.vessel$SI_TIME), + tz='GMT', "%e/%m/%Y %H:%M" ) + } else{ + if(!any(colnames(tacsat.this.vessel) %in% 'SI_DATIM')) stop('you need either to inform a SI_DATIM or a SI_DATE') + } + + if(is.null(general$speed)) general$speed <- "calculated" + if(general$speed=="calculated"){ + # add a apparent speed colunm (nautical miles per hour) + diff.time <- tacsat.this.vessel[-nrow(tacsat.this.vessel),"SI_DATIM"] - + tacsat.this.vessel[-1,"SI_DATIM"] + tacsat.this.vessel$diff.time.mins <- c(0, as.numeric(diff.time, units="mins")) + tacsat.this.vessel$speed <- signif( + c(0, distAB.f(A= tacsat.this.vessel[-nrow(tacsat.this.vessel),c("SI_LATI","SI_LONG")], + B= tacsat.this.vessel[-1,c("SI_LATI","SI_LONG")], .unit="nm") / (abs(tacsat.this.vessel[-1,]$diff.time.mins)/60)), + 3) + } + if(general$speed=="observed"){ + tacsat.this.vessel$speed <- tacsat.this.vessel$SI_SP # instantaneous speed + } + + # cleaning irrealistic points + tacsat.this.vessel$speed <- + replace(tacsat.this.vessel$speed, is.na(tacsat.this.vessel$speed), 0) + + + idx <- tacsat.this.vessel[tacsat.this.vessel$speed >= 30 | + is.infinite(tacsat.this.vessel$speed),"idx"] + tacsat <- tacsat[!tacsat$idx %in% idx,] # just remove! + tacsat.this.vessel <- tacsat.this.vessel[tacsat.this.vessel$speed < 30,] + + # check + if(is.null(levels(factor(tacsat.this.vessel$LE_GEAR)))) + stop('you need first to assign a gear LE_GEAR to each ping') + + + # A FOR-LOOP BY GEAR-------------------- + for (gr in levels(factor(tacsat.this.vessel$LE_GEAR))){ + + xxx <- tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,] # input + + x <- as.numeric(as.character(sort(xxx$speed))) *100 # multiply by factor 100 because integer needed + hi <- hist(x, nclass=30,plot=FALSE) + + y <- c(1:length(sort(xxx$speed))) # sort in increasing order + y <- y[x>100 & x<1000] # just removing the 0, and the larger speeds we 100% know it is steaming + x <- x[x>100 & x<1000] + dati <- data.frame(x=x,y=y) + dati$x <- as.integer(dati$x) # integer needed + psi <- list(x= quantile(dati$x,probs=c(0.05,0.5)) ) + assign('dati', dati, envir=.GlobalEnv) # DEBUG segmented()...this function looks in the global env to get dati!! + # get good start guesses + hi$counts <- hi$counts[-1] + idx <- which(hi$counts==max(hi$counts))[1] + more.frequent.speed <- hi$mids[idx] # assumed to be for fishing + + a.flag <- FALSE + if(is.na(more.frequent.speed) | length(x)==0){ a.flag <- TRUE + } else { + while(more.frequent.speed > 700 || more.frequent.speed < 100){ + hi$counts <- hi$counts[-idx] + hi$mids <- hi$mids[-idx] + idx <- which(hi$counts==max(hi$counts))[1] + more.frequent.speed <- hi$mids[idx] + if(is.na(more.frequent.speed)){ + #=> for very few cases we have no speed found within 100-700 e.g. for the UNK gear + a.flag <- TRUE ; break + } + } + } + if(!a.flag){ + start.lower.bound <- ifelse(more.frequent.speed-200<= min(dati$x), + min(dati$x)+100, more.frequent.speed-200) + start.upper.bound <- ifelse(more.frequent.speed+200>= max(dati$x), + max(dati$x)-100, more.frequent.speed+200) + psi <- list(x= c(start.lower.bound, start.upper.bound) ) + psi$x[1] <- dati$x [dati$x<=psi$x[1]] [length(dati$x [dati$x<=psi$x[1]])] # get the bound value of the start guess from dati$x + psi$x[2] <- dati$x [dati$x>psi$x[2]] [1] + o <- 1 ; class(o) <- "try-error" ; count <- 0 ; bound1 <-NULL ; bound2 <- NULL; + while(class(o)=="try-error"){ + count <- count+1 + o <- try( + segmented(lm(y~x, data=dati) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses + silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so... + if(!"try-error" %in% class(o)) break else psi <- list(x=c(psi$x[1],psi$x[2]-20)) # searching decreasing by 20 each time + if(count>10) {bound1 <- start.lower.bound; bound2 <- start.upper.bound ; cat(paste("failure of the segmented regression for",a.vesselid,gr,"\n...")); break} + } + if(is.null(bound1)) bound1 <- o$psi[order(o$psi[,"Est."])[1],"Est."] -20 # -20 hard to justify... + if(is.null(bound2)) bound2 <- o$psi[order(o$psi[,"Est."])[2],"Est."] +20 + + if(general$visual.check){ + X11() + par(mfrow=c(2,1)) + if(!"try-error" %in% class(o)){ + plot(dati$x/100, o$fitted.values, type="l", + ylab="cumulative distrib.", xlab="Knots", + main=paste("segmented regression - ",a.vesselid)) + } else{plot(0,0,type="n")} + #plot(hi) + #points(dati$x,dati$y) + tmp <- as.numeric(as.character(sort(xxx$speed))) + his <- hist(tmp, nclass=100, main=paste(general$speed,"speed between consecutive points"), + xlab="apparent speed [knots]", plot=TRUE) + if(!is.null(bound1)) abline(v=bound1/100,col=2) + if(!is.null(bound2)) abline(v=bound2/100,col=2) + if(!is.null(bound1)) text(bound1/100, median(his$counts), signif(bound1,3), col=2, cex=2) + if(!is.null(bound2)) text(bound2/100, median(his$counts), signif(bound2,3), col=2, cex=2) + # save the panel plot + savePlot(filename = file.path(general$output.path, + paste(unique(a.vesselid),"-detected_speed_span_for_feffort-", general$a.year,"-",gr, sep="")), + type = c("wmf"), device = dev.cur(), restoreConsole = TRUE) + dev.off() + } + + # so, + bound1 <- bound1 / 100 # transform back + bound2 <- bound2 / 100 # transform back + xxx$speed <- replace(xxx$speed, is.na(xxx$speed), 0) # debug 0/0 + + + + # maybe you want to only keep the upper bound + # and then set the lower one for a particular gear? + if(gr %in% gears.to.force){ + bound1 <- force.lower.bound + cat(paste(gr," force lower bound at", force.lower.bound, "knots\n")) + } + + # then, assign... + xxx[xxx$speed < bound1, "SI_STATE"] <- 2 # steaming + xxx[xxx$speed >= bound1 & xxx$speed < bound2, "SI_STATE"] <- 1 # fishing + xxx[xxx$speed >= bound2 , "SI_STATE"] <- 2 # steaming + tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- xxx$SI_STATE # output + tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound1"] <- bound1 + tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr,"bound2"] <- bound2 + cat(paste(gr," lower", general$speed,"speed bound:",round(bound1,1),"knots\n")) + cat(paste(gr," upper", general$speed,"speed bound:",round(bound2,1),"knots\n")) + } else{ + tacsat.this.vessel[tacsat.this.vessel$LE_GEAR==gr, "SI_STATE"] <- 2 + #=> end a.flag: in case of very few records for this gear... + } + } # end gr + + + # clean up by removing no longer used columns + tacsat.this.vessel <- tacsat.this.vessel[, !colnames(tacsat.this.vessel) %in% + c('speed','diff.time.mins')] + + tacsat[tacsat$VE_REF==a.vesselid,] <- tacsat.this.vessel # output + } # end of a.vesselid + + + # plot for speed bound distribution + tacsat2 <- tacsat[tacsat$VE_REF %in% vessels & !duplicated(tacsat[,c("VE_REF","LE_GEAR")]),] + b1 <- tapply(tacsat2$bound1, tacsat2$LE_GEAR, mean, na.rm=TRUE) + b2 <- tapply(tacsat2$bound2, tacsat2$LE_GEAR, mean, na.rm=TRUE) + cat(paste("lower speed bound mean:",round(b1,1),"knots\n")) + cat(paste("upper speed bound mean:",round(b2,1),"knots\n")) + if(general$visual.check){ + X11() + op <- par(no.readonly = TRUE) + par(mfrow=c(1,2)) + boxplot(bound1 ~ LE_GEAR, data=tacsat2, ylim=c(0,8)) + boxplot(bound2 ~ LE_GEAR, data=tacsat2, ylim=c(0,8)) + par(op) + dev.off() + } + # save + write.table(tacsat2[,c("VE_REF","LE_GEAR","bound1","bound2")], append=TRUE, + file = file.path(general$output.path, + paste("speed_bounds_per_vessel_per_gear_", general$a.year,".txt", sep="")), + quote=FALSE, col.names=FALSE, row.names=FALSE) + + +return(tacsat[tacsat$VE_REF %in% vessels, !colnames(tacsat) %in% c('bound1','bound2')]) +} + diff --git a/vmstools/R/segmentedTacsatSpeed.r b/vmstools/R/segmentedTacsatSpeed.r index 6ebde2e..aaa9312 100644 --- a/vmstools/R/segmentedTacsatSpeed.r +++ b/vmstools/R/segmentedTacsatSpeed.r @@ -1,141 +1,192 @@ -segmentedTacsatSpeed <- function(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95,saveDir=tempdir(),forceLowerBound=NULL){ - - require(segmented) - tacsat$idxFun <- 1:nrow(tacsat) - - #Regular checks - if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE <- NA - if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - if(analyse.by == "LE_GEAR"){ if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'")} - if(!analyse.by %in% c("LE_GEAR","VE_REF","VE_REF+LE_GEAR")) warning("Analysing by unknown column variable, please check!") - if(analyse.by == "VE_REF+LE_GEAR"){ - if(!"VE_REF" %in% colnames(tacsat) | !"LE_GEAR" %in% colnames(tacsat)) stop("VE_REF and LE_GEAR not both in tacsat available") - tacsat$VE_REF_LE_GEAR <- paste(tacsat$VE_REF,tacsat$LE_GEAR) - analyse.by <- "VE_REF_LE_GEAR" - } - - tacsatOrig <- tacsat - if(analyse.by %in% colnames(tacsat)){ - if(units == "all"){ yrs <- 0; mths <- 0; wks <- 0} - if(units == "year"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- 0; wks <- 0} - if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- sort(unique(month(tacsat$SI_DATIM))); wks <- 0} - if(units == "week"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks <- sort(unique(week(tacsat$SI_DATIM))); mths <- 0} - } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")} - storeScheme <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by])) - # Add upper and lower boundaries to storeScheme - storeScheme <- cbind(storeScheme,data.frame(lower=rep(0,nrow(storeScheme)), - upper=rep(0,nrow(storeScheme)))) - storeScheme$analyse.by <- ac(storeScheme$analyse.by) - storeScheme$success <- 0 - for(iRun in 1:nrow(storeScheme)){ - yr <- storeScheme[iRun,"years"] - mth <- storeScheme[iRun,"months"] - wk <- storeScheme[iRun,"weeks"] - aby <- storeScheme[iRun,"analyse.by"] - if(analyse.by == "VE_REF"){ - if(nrow(storeScheme)==1){ sTacsat <- tacsat - } else { - if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF == aby) - if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF == aby) - if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF == aby) - } - } - if(analyse.by == "LE_GEAR"){ - if(nrow(storeScheme)==1){ sTacsat <- tacsat - } else { - if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & LE_GEAR == aby) - if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & LE_GEAR == aby) - if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & LE_GEAR == aby) - } - } - if(analyse.by == "VE_REF_LE_GEAR"){ - if(nrow(storeScheme)==1){ sTacsat <- tacsat - } else { - if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF_LE_GEAR == aby ) - if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF_LE_GEAR == aby) - if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF_LE_GEAR == aby) - } - } - - #Check if there is any data available - if(nrow(sTacsat)>0){ - - #Check if you want to run with calculated or instantaneous speed - if(speed == "calculated"){ - sTacsat <- sortTacsat(sTacsat) - sTacsat$SI_SP_ORIG <- sTacsat$SI_SP - if("FT_REF" %in% colnames(tacsat)){ sTacsat$SI_SP <- calculateSpeed(sTacsat,level="trip", weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA - } else { sTacsat$SI_SP <- calculateSpeed(sTacsat,level="vessel", weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA} - } - #Remove records where SI_SP is NA - sTacsat <- sTacsat[which(is.na(sTacsat$SI_SP)==F & sTacsat$SI_SP > 0 & sTacsat$SI_SP <= 10),] - - - minRows <- 20 #Based on a scan of sub tacsat datasets, 5% percentile was approx 20 - - #Exception exercise: If number of rows in sTacsat is really low, return NA - if(nrow(sTacsat) <= minRows){ - storeScheme[iRun,"lower"] <- NA - storeScheme[iRun,"upper"] <- NA - } - - #Regular case: If number of rows is enough to fit segmented regression - if(nrow(sTacsat) > minRows){ - - #Define starting values for segmented regression - hi <- hist(sTacsat$SI_SP,breaks=diff(c(floor( range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[1]), - ceiling(range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[2]))),plot=FALSE) - acc <- diff(diff(cumsum(hi$counts))) - idx <- rev(sort(abs(diff(diff(cumsum(hi$counts))))))[1:2] - cnts<- which(abs(acc) %in% idx)+2 #Taking twice diff, so add 2 to get back to counts - psi <- list(x=range(hi$breaks[cnts])) #First guess on breakpoints - psiOrig <- psi - dat <- data.frame(x=sort(sTacsat$SI_SP[sTacsat$SI_SP>0]), - y=1:length(sTacsat$SI_SP[sTacsat$SI_SP>0])) - if(logfit==TRUE) dat <- data.frame(x= an(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])), - ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0]))))), - y=1:length(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])), - ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0])))))) - o <- 1 ; class(o) <- "try-error" ; count <- 0 ; bound1 <-NULL ; bound2 <- NULL; - - #Fit the model - while(class(o)=="try-error"){ - count <- count+1 - o <- try( - segmented(lm(y~x, data=dat) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses - silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so... - if(!"try-error" %in% class(o)) break else psi <- list(x=c(sort(runif(2,min=range(sTacsat$SI_SP,na.rm=TRUE)[1],max=range(sTacsat$SI_SP,na.rm=TRUE)[2])))) # searching decreasing by 1 each time - if(count>20) {bound1 <- psiOrig$x[1]; bound2 <- psiOrig$x[2] ; cat("failure of the segmented regression for",paste(c("year","month","week","analyse.by"),storeScheme[iRun,1:4]),"\n"); break} - } - #Calculate the bounds and whether the fit has been successful or not - if(is.null(bound1)==T & is.null(bound2)==TRUE){ - bound1 <- max(range(sTacsat$SI_SP)[1],min(confint(o,level=CI)[,grep("low",colnames(confint(o)))])) - bound2 <- min(range(sTacsat$SI_SP)[2],max(confint(o,level=CI)[,grep("up", colnames(confint(o)))])) - if(class(o)[1] != "try-error") storeScheme[iRun,"success"] <- 1 - } - - #Save the bounds - if(is.null(forceLowerBound)==FALSE) - bound1 <- forceLowerBound - if(bound2 < bound1) - bound2 <- bound1 - storeScheme[iRun,"lower"] <- bound1 - storeScheme[iRun,"upper"] <- bound2 - - tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP >= storeScheme[iRun,"lower"] & sTacsat$SI_SP <= storeScheme[iRun,"upper"])]] <- "f" #Fishing - tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP < storeScheme[iRun,"lower"] | sTacsat$SI_SP > storeScheme[iRun,"upper"])]] <- "nf" #Steaming / in harbour - } - } - } - #Write the results to file and display the success rates - write.csv(storeScheme,file=file.path(saveDir,"storeScheme.csv")) - cat("Successful segmented regression fits",length(which(storeScheme$success==1)),"\n", - "versus unsuccessful fits",length(which(storeScheme$success == 0)),"\n\n", - "Check ",file.path(saveDir,"storeScheme.csv"),"for details \n\n") - - cat("Note: fishing = f, no fishing = nf\n") -return(tacsatOrig[,-grep("idxFun",colnames(tacsatOrig))])} - -#res <- segmentedTacsatSpeed(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95) - - +#' Define activity based on segmented regression of speed profile +#' +#' Given the speed profile by gear or vessel in a user defined time frame a +#' segmented regression analyses will be performed to indicate where the +#' fishing speeds are located. The segmented regression takes place on the +#' cumulative speed profile. +#' +#' To fit a speed profile at least 20 VMS pings must exist. The function highly +#' depends on accurate starting points. After 20 random tries the fitting +#' procedure exits and returns a '0' success. +#' +#' @param tacsat A tacsat dataset (with optional column "LE_GEAR" when matched +#' to eflalo) +#' @param units Analyse by: "year", "month" and "week". "month" and "week" +#' cannot be used at same time. +#' @param analyse.by Analyse tacsat by gear ("LE_GEAR"), vessel ("VE_REF") or a +#' combination of gear and vessel ("VE_REF+LE_GEAR"). +#' @param speed Define if speed profile used must be taken as given in the +#' tacsat file (speed = "instantanious") or if internally speed must be +#' calculated (speed = "calculated"). Default is "calculated" +#' @param logfit Logical. Define whether the speed profile frequencies must be +#' log-transformed. Default is F. +#' @param CI Define confidence interval for calculated segmented break points. +#' Default is 0.95. +#' @param saveDir Directory to save overview and success of fit. Default = +#' tempdir(). +#' @param forceLowerBound Fix the lower breakpoint value at the forceLowerBound +#' given. If not specified, lower breakpoint is estimated. +#' @return SI_STATE = nf for no-fishing and SI_STATE = f for fishing +#' @author Niels T. Hintzen, Francois Bastardie +#' @seealso \code{\link{activityTacsatAnalyse}}, \code{\link{activityTacsat}} +#' @references Bastardie et al. 2010 +#' @examples +#' +#' data(tacsat) +#' tacsat <- tacsat[1:20000,] +#' +#' #-Fit based on vessel and calculated speed +#' newTacsat <- segmentedTacsatSpeed(tacsat,units="year",analyse.by="VE_REF", +#' speed="calculated",logfit=FALSE,CI=0.95) +#' +#' data(eflalo) +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' tacsatp$LE_GEAR <- eflalo$LE_GEAR[match(tacsatp$FT_REF,eflalo$FT_REF)] +#' +#' #-Fit based on gear and instantaneous speed +#' newTacsat <- segmentedTacsatSpeed(tacsatp,units="year",analyse.by="LE_GEAR", +#' speed="instantaneous",logfit=FALSE,CI=0.95) +#' +#' +#' @export segmentedTacsatSpeed +segmentedTacsatSpeed <- function(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95,saveDir=tempdir(),forceLowerBound=NULL){ + + require(segmented) + tacsat$idxFun <- 1:nrow(tacsat) + + #Regular checks + if(!'SI_STATE' %in% colnames(tacsat)) tacsat$SI_STATE <- NA + if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + if(analyse.by == "LE_GEAR"){ if(!"LE_GEAR" %in% colnames(tacsat)) stop("Provide gear type (as column 'LE_GEAR' and if unknown, provide it as 'MIS'")} + if(!analyse.by %in% c("LE_GEAR","VE_REF","VE_REF+LE_GEAR")) warning("Analysing by unknown column variable, please check!") + if(analyse.by == "VE_REF+LE_GEAR"){ + if(!"VE_REF" %in% colnames(tacsat) | !"LE_GEAR" %in% colnames(tacsat)) stop("VE_REF and LE_GEAR not both in tacsat available") + tacsat$VE_REF_LE_GEAR <- paste(tacsat$VE_REF,tacsat$LE_GEAR) + analyse.by <- "VE_REF_LE_GEAR" + } + + tacsatOrig <- tacsat + if(analyse.by %in% colnames(tacsat)){ + if(units == "all"){ yrs <- 0; mths <- 0; wks <- 0} + if(units == "year"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- 0; wks <- 0} + if(units == "month"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); mths <- sort(unique(month(tacsat$SI_DATIM))); wks <- 0} + if(units == "week"){ yrs <- sort(unique(format(tacsat$SI_DATIM,"%Y"))); wks <- sort(unique(week(tacsat$SI_DATIM))); mths <- 0} + } else { stop("analyse.by statement not found as a column in the specified tacsat dataset")} + storeScheme <- expand.grid(years=yrs,months=mths,weeks=wks,analyse.by=unique(tacsat[,analyse.by])) + # Add upper and lower boundaries to storeScheme + storeScheme <- cbind(storeScheme,data.frame(lower=rep(0,nrow(storeScheme)), + upper=rep(0,nrow(storeScheme)))) + storeScheme$analyse.by <- ac(storeScheme$analyse.by) + storeScheme$success <- 0 + for(iRun in 1:nrow(storeScheme)){ + yr <- storeScheme[iRun,"years"] + mth <- storeScheme[iRun,"months"] + wk <- storeScheme[iRun,"weeks"] + aby <- storeScheme[iRun,"analyse.by"] + if(analyse.by == "VE_REF"){ + if(nrow(storeScheme)==1){ sTacsat <- tacsat + } else { + if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF == aby) + if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF == aby) + if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF == aby) + } + } + if(analyse.by == "LE_GEAR"){ + if(nrow(storeScheme)==1){ sTacsat <- tacsat + } else { + if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & LE_GEAR == aby) + if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & LE_GEAR == aby) + if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & LE_GEAR == aby) + } + } + if(analyse.by == "VE_REF_LE_GEAR"){ + if(nrow(storeScheme)==1){ sTacsat <- tacsat + } else { + if(mth == 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & VE_REF_LE_GEAR == aby ) + if(mth == 0 & wk != 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & week( tacsat$SI_DATIM) == wk & VE_REF_LE_GEAR == aby) + if(mth != 0 & wk == 0) sTacsat <- subset(tacsat,format(tacsat$SI_DATIM,"%Y") == yr & month(tacsat$SI_DATIM) == mth & VE_REF_LE_GEAR == aby) + } + } + + #Check if there is any data available + if(nrow(sTacsat)>0){ + + #Check if you want to run with calculated or instantaneous speed + if(speed == "calculated"){ + sTacsat <- sortTacsat(sTacsat) + sTacsat$SI_SP_ORIG <- sTacsat$SI_SP + if("FT_REF" %in% colnames(tacsat)){ sTacsat$SI_SP <- calculateSpeed(sTacsat,level="trip", weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA + } else { sTacsat$SI_SP <- calculateSpeed(sTacsat,level="vessel", weight=c(0.5,0.5), fill.na=TRUE)$SI_SPCA} + } + #Remove records where SI_SP is NA + sTacsat <- sTacsat[which(is.na(sTacsat$SI_SP)==F & sTacsat$SI_SP > 0 & sTacsat$SI_SP <= 10),] + + + minRows <- 20 #Based on a scan of sub tacsat datasets, 5% percentile was approx 20 + + #Exception exercise: If number of rows in sTacsat is really low, return NA + if(nrow(sTacsat) <= minRows){ + storeScheme[iRun,"lower"] <- NA + storeScheme[iRun,"upper"] <- NA + } + + #Regular case: If number of rows is enough to fit segmented regression + if(nrow(sTacsat) > minRows){ + + #Define starting values for segmented regression + hi <- hist(sTacsat$SI_SP,breaks=diff(c(floor( range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[1]), + ceiling(range(sTacsat$SI_SP[is.finite(sTacsat$SI_SP)],na.rm=TRUE)[2]))),plot=FALSE) + acc <- diff(diff(cumsum(hi$counts))) + idx <- rev(sort(abs(diff(diff(cumsum(hi$counts))))))[1:2] + cnts<- which(abs(acc) %in% idx)+2 #Taking twice diff, so add 2 to get back to counts + psi <- list(x=range(hi$breaks[cnts])) #First guess on breakpoints + psiOrig <- psi + dat <- data.frame(x=sort(sTacsat$SI_SP[sTacsat$SI_SP>0]), + y=1:length(sTacsat$SI_SP[sTacsat$SI_SP>0])) + if(logfit==TRUE) dat <- data.frame(x= an(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])), + ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0]))))), + y=1:length(rep(names(table(sTacsat$SI_SP[sTacsat$SI_SP>0])), + ceiling(log(table(sTacsat$SI_SP[sTacsat$SI_SP>0])))))) + o <- 1 ; class(o) <- "try-error" ; count <- 0 ; bound1 <-NULL ; bound2 <- NULL; + + #Fit the model + while(class(o)=="try-error"){ + count <- count+1 + o <- try( + segmented(lm(y~x, data=dat) , seg.Z=~x , psi=psi, control= seg.control(display = FALSE, it.max=50, h=1)), # with 2 starting guesses + silent=TRUE) # the second breakpoint is quite uncertain and could lead to failure so... + if(!"try-error" %in% class(o)) break else psi <- list(x=c(sort(runif(2,min=range(sTacsat$SI_SP,na.rm=TRUE)[1],max=range(sTacsat$SI_SP,na.rm=TRUE)[2])))) # searching decreasing by 1 each time + if(count>20) {bound1 <- psiOrig$x[1]; bound2 <- psiOrig$x[2] ; cat("failure of the segmented regression for",paste(c("year","month","week","analyse.by"),storeScheme[iRun,1:4]),"\n"); break} + } + #Calculate the bounds and whether the fit has been successful or not + if(is.null(bound1)==T & is.null(bound2)==TRUE){ + bound1 <- max(range(sTacsat$SI_SP)[1],min(confint(o,level=CI)[,grep("low",colnames(confint(o)))])) + bound2 <- min(range(sTacsat$SI_SP)[2],max(confint(o,level=CI)[,grep("up", colnames(confint(o)))])) + if(class(o)[1] != "try-error") storeScheme[iRun,"success"] <- 1 + } + + #Save the bounds + if(is.null(forceLowerBound)==FALSE) + bound1 <- forceLowerBound + if(bound2 < bound1) + bound2 <- bound1 + storeScheme[iRun,"lower"] <- bound1 + storeScheme[iRun,"upper"] <- bound2 + + tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP >= storeScheme[iRun,"lower"] & sTacsat$SI_SP <= storeScheme[iRun,"upper"])]] <- "f" #Fishing + tacsatOrig$SI_STATE[sTacsat$idxFun[which(sTacsat$SI_SP < storeScheme[iRun,"lower"] | sTacsat$SI_SP > storeScheme[iRun,"upper"])]] <- "nf" #Steaming / in harbour + } + } + } + #Write the results to file and display the success rates + write.csv(storeScheme,file=file.path(saveDir,"storeScheme.csv")) + cat("Successful segmented regression fits",length(which(storeScheme$success==1)),"\n", + "versus unsuccessful fits",length(which(storeScheme$success == 0)),"\n\n", + "Check ",file.path(saveDir,"storeScheme.csv"),"for details \n\n") + + cat("Note: fishing = f, no fishing = nf\n") +return(tacsatOrig[,-grep("idxFun",colnames(tacsatOrig))])} + +#res <- segmentedTacsatSpeed(tacsat,units="year",analyse.by="VE_REF",speed="calculated",logfit=FALSE,CI=0.95) + + diff --git a/vmstools/R/selectMainSpecies.r b/vmstools/R/selectMainSpecies.r index fcf205c..278717e 100644 --- a/vmstools/R/selectMainSpecies.r +++ b/vmstools/R/selectMainSpecies.r @@ -1,316 +1,426 @@ -################################################################################ -# EXPLORE THE SELECTED SPECIES DEPENDING ON THE METHOD (HAC, TOTALE, LOGEVENT) # -# AND THE THRESHOLD CHOSEN # -################################################################################ - -selectMainSpecies=function(dat,analysisName="",RunHAC=TRUE,DiagFlag=FALSE){ - - require(FactoMineR) # function PCA - require(cluster) # functions pam & clara - require(SOAR) # function Store - require(amap) # function hcluster - require(MASS) # function lda - require(mda) # function fda - - p=ncol(dat) # Number of species - n=nrow(dat) - - # Transform quantities to proportions of total quantity caught by logevent - print("calculating proportions...") - - propdat=transformation_proportion(dat[,2:p]) - nameSpecies=colnames(propdat) - nbAllSpecies=ncol(propdat) - - t1=Sys.time() - - if (RunHAC == TRUE) { - - # METHOD : 'HAC' - - print("######## SPECIES EXPLORATION METHOD 1: HAC ########") - # Transposing data - table_var=table_variables(propdat) - - # HAC - print("cluster...") - cah_var=hcluster(table_var, method="euclidean", link="ward") - - Store(objects()) - gc(reset=TRUE) - - # Select the number of clusters by scree-test - inerties.vector=cah_var$height[order(cah_var$height,decreasing=TRUE)] - nb.finalclusters=which(scree(inerties.vector)[,"epsilon"]<0)[1] - - if(!is.na(nb.finalclusters)){ - # Dendogram cutting at the selected level - cah_cluster_var=cutree(cah_var,k=nb.finalclusters) - - png(paste(analysisName,"HAC_Dendogram_Step1.png",sep="_"), width = 1200, height = 800) - plot(cah_var,labels=FALSE,hang=-1,ann=FALSE) - title(main="HAC dendogram",xlab="Species",ylab="Height") - rect.hclust(cah_var, k=nb.finalclusters) - dev.off() - - temp=select_species(dat[,2:p],cah_cluster_var) - namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species - - fait=FALSE - nb_cut=1 - while ((fait == FALSE) && (nb_cut < (p-nb.finalclusters-2))) { - # cutting below - print(paste("----------- nb_cut =",nb_cut)) - cah_cluster_var_step=cutree(cah_var,k=(nb.finalclusters+nb_cut)) - # testing residual species - print(paste("------------- Residual species cluster(s) ",unique(cah_cluster_var_step[namesResidualSpecies]))) - if (length(unique(cah_cluster_var_step[namesResidualSpecies]))==1) { - print(paste("------------- No residual cut -----")) - nb_cut = nb_cut+1 # cutting below - }else{ - print("------------- Residual cut -----") - nbSpeciesClusters=table(cah_cluster_var_step[namesResidualSpecies]) - # testing if a species is alone in a group - if (sort(nbSpeciesClusters)[1]>1) { # if not alone - print("------- I stop and have a beer ------") - fait = TRUE # then I stop - }else{ - print("------ Updating residual species -----") - nb_cut = nb_cut+1; # if alone then cutting below and updating nameResidualSpecies to start again - numGroupSpeciesAlone = as.numeric(names(sort(nbSpeciesClusters)[1])) - namesSpeciesAlone = names(cah_cluster_var_step)[which(cah_cluster_var_step==numGroupSpeciesAlone)] - namesResidualSpecies = namesResidualSpecies[ - which(namesResidualSpecies==namesSpeciesAlone)] - print(paste("---- Adding new species ---",namesSpeciesAlone)) - } - } - } # end of while - - - # If all species are selected step by step, the final k is the initial cut (nb.finalclusters) - if((nb.finalclusters+nb_cut)>=(p-2)){ - kFinal=nb.finalclusters - cah_cluster_var=cutree(cah_var,k=kFinal) - temp=select_species(dat[,2:p],cah_cluster_var) - namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species - } - - - # Dendogram of the first cut in the residual species cluster - png(paste(analysisName,"HAC_Dendogram_Step1_ResidualSpecies.png",sep="_"), width = 1200, height = 800) - plot(cah_var,labels=FALSE,hang=-1,ann=FALSE) - title(main="HAC dendogram - Step",xlab="Species",ylab="Height") - if((nb.finalclusters+nb_cut)>=(p-2)){ - rect.hclust(cah_var, k=kFinal) - }else{ - rect.hclust(cah_var, k=(nb.finalclusters+nb_cut)) - } - dev.off() - - # Selection of main species - nomespsel=setdiff(nameSpecies,namesResidualSpecies) - cat("main species : ",nomespsel,"\n") - - # Return the dataset retaining only the main species - nbMainSpeciesHAC=length(nomespsel) - namesMainSpeciesHAC=nomespsel - propNbMainSpeciesHAC=nbMainSpeciesHAC/nbAllSpecies*100 - - if(DiagFlag==TRUE) { - datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel) - pourcentCatchMainSpeciesHAC=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100 - medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC) - } - - Store(objects()) - gc(reset=TRUE) - - } else { - namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA - } - - print(Sys.time()-t1) - - }else{ namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA } - - - # METHOD : 'TOTALE' - - print("######## SPECIES EXPLORATION METHOD 2: 'TOTAL' ########") - - # Total quantity caught by species - sumcol=numeric(length=p-1) - for(i in 2:p){ - sumcol[i-1]=sum(dat[,i], na.rm=TRUE) - } - names(sumcol)=names(dat)[-1] - - # Percent of each species in the total catch - propesp=sumcol/sum(sumcol,na.rm=TRUE)*100 - # Columns number of each species by decreasing order of capture - numesp=order(propesp,decreasing=TRUE) - # Percent of each species in the total catch by cumulated decreasing order - propesp=cumsum(propesp[order(propesp,decreasing=TRUE)]) - - # We are taking all species until having at least seuil% of total catch - nbMainSpeciesTotal=numeric() - medianPourcentCatchMainSpeciesTotal=numeric() - - for(seuil in seq(5,100,5)){ - cat("seuil:",seuil,"\n") - pourcent=which(propesp<=seuil) - # We are taking the name of selected species - espsel=numesp[1:(length(pourcent)+1)] - nomespsel=nameSpecies[espsel] - nbMainSpeciesTotal[seuil/5]=length(nomespsel) - - if(DiagFlag==TRUE) { - # We are building the table with main species and aggregated other species - datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel) - if(length(nomespsel)==1){ - vectorNul=rep(0,n) - datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul) - } - pourcentCatchMainSpeciesTotal=apply(datSpeciesWithoutProp,1,sum, na.rm=TRUE)/apply(dat[,2:p],1,sum, na.rm=TRUE)*100 - medianPourcentCatchMainSpeciesTotal[seuil/5]=median(pourcentCatchMainSpeciesTotal) - } - } - nbMainSpeciesTotal=c(0,nbMainSpeciesTotal) - nbMainSpeciesTotal[length(nbMainSpeciesTotal)]=p-1 - namesMainSpeciesTotal=nomespsel[1:nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]] - propNbMainSpeciesTotal=nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]/nbAllSpecies*100 - - if (DiagFlag) medianPourcentCatchMainSpeciesTotal=c(0,medianPourcentCatchMainSpeciesTotal) - - Store(objects()) - gc(reset=TRUE) - - print(Sys.time()-t1) - - - - # METHOD : 'LOGEVENT' - - print("######## SPECIES EXPLORATION METHOD 3: 'LOGEVENT' ########") - - nbMainSpeciesLogevent=numeric() - medianPourcentCatchMainSpeciesLogevent=numeric() - - for(seuil in seq(5,100,5)){ - cat("seuil:",seuil,"\n") - nomespsel=character() - # We are taking all species with a % of catch >= seuil% for at least one logevent - for (i in nameSpecies) if (!is.na(any(propdat[,i]>=seuil)) && any(propdat[,i]>=seuil)) nomespsel <- c(nomespsel,i) - nbMainSpeciesLogevent[seuil/5]=length(nomespsel) - - # We are building the table with main species and aggregated other species - if(DiagFlag==TRUE) { - datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel) - if(length(nomespsel)==1){ - vectorNul=rep(0,n) - datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul) - } - pourcentCatchMainSpeciesLogevent=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100 - medianPourcentCatchMainSpeciesLogevent[seuil/5]=median(pourcentCatchMainSpeciesLogevent) - } - } - nbMainSpeciesLogevent=c(p-1,nbMainSpeciesLogevent) - namesMainSpeciesLogevent=nomespsel - propNbMainSpeciesLogevent=nbMainSpeciesLogevent[length(nbMainSpeciesLogevent)]/nbAllSpecies*100 - - - if(DiagFlag) medianPourcentCatchMainSpeciesLogevent=c(100,medianPourcentCatchMainSpeciesLogevent) - - print(Sys.time()-t1) - - # GRAPHICS - - # Number of main species - X11(5,5) - plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species") - lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3) - if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3) - axis(1) - axis(2, las=2) - box() - legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n") - savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png") - dev.off() - - X11(5,5) - plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species") - lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3) - if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3) - axis(1) - axis(2, las=2) - box() - legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n") - savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png") - dev.off() - - # Black and white version - X11(5,5) - plot(seq(0,100,5),nbMainSpeciesTotal, type='l' ,lty='dashed', col="black",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species") - lines(seq(0,100,5),nbMainSpeciesLogevent, type='l', lty='dotted', col="black",lwd=3) - if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="black",lwd=3) - axis(1) - axis(2, las=2) - box() - legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("black", "black", "black"),bty="n",lty=c('solid','dashed','dotted'),box.lty = par("lty")) - savePlot(filename = paste(analysisName,'Number of main species_new_2',sep="_"),type ="png") - dev.off() - - # Median percentage of catch represented by main species by logevent - if(DiagFlag){ - png(paste(analysisName,"Median percentage of catch represented by main species by logevent.png",sep="_"), width = 1200, height = 800) - plot(seq(0,100,5),medianPourcentCatchMainSpeciesTotal,type='l',col="blue",lwd=2, main="Median percentage of catch represented by main species by logevent depending of the threshold", xlab="Threshold (%)",ylab="Median percentage of catch represented by main species by logevent") - lines(seq(0,100,5),medianPourcentCatchMainSpeciesLogevent,col="green",lwd=2) - if (RunHAC==TRUE) abline(medianPourcentCatchMainSpeciesHAC,0, col="red",lwd=2) - mtext(paste(p-1," Species"),col='darkblue') - if (RunHAC==TRUE) legend(70, 40, c("HAC", "Total", "Logevent"),lwd=2,col=c("red", "blue", "green")) - if (RunHAC==FALSE) legend(70, 40, c("Total", "Logevent"),lwd=2,col=c("blue", "green")) - dev.off() - } - - listSpecies=sort(unique(c(namesMainSpeciesHAC,namesMainSpeciesTotal,namesMainSpeciesLogevent))) - - # Proportion of the total catch represented by the species in listSpecies (= namesMainSpeciesAll) - catchListSpecies=sumcol[listSpecies] - propCatchListSpecies=sum(catchListSpecies)/sum(sumcol)*100 - - - if(DiagFlag==FALSE) { - explo_species = list(nbAllSpecies=nbAllSpecies, - propNbMainSpeciesHAC=propNbMainSpeciesHAC, - propNbMainSpeciesTotal=propNbMainSpeciesTotal, - propNbMainSpeciesLogevent=propNbMainSpeciesLogevent, - nbMainSpeciesHAC=nbMainSpeciesHAC, - nbMainSpeciesTotal=nbMainSpeciesTotal, - nbMainSpeciesLogevent=nbMainSpeciesLogevent, - namesMainSpeciesHAC=sort(namesMainSpeciesHAC), - namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal), - namesMainSpeciesTotalByImportance=namesMainSpeciesTotal, - namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent), - namesMainSpeciesAll=listSpecies, - propCatchMainSpeciesAll=propCatchListSpecies) - }else{ - explo_species = list(nbAllSpecies=nbAllSpecies, - propNbMainSpeciesHAC=propNbMainSpeciesHAC, - propNbMainSpeciesTotal=propNbMainSpeciesTotal, - propNbMainSpeciesLogevent=propNbMainSpeciesLogevent, - nbMainSpeciesHAC=nbMainSpeciesHAC, - nbMainSpeciesTotal=nbMainSpeciesTotal, - nbMainSpeciesLogevent=nbMainSpeciesLogevent, - namesMainSpeciesHAC=sort(namesMainSpeciesHAC), - namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal), - namesMainSpeciesTotalByImportance=namesMainSpeciesTotal, - namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent), - namesMainSpeciesAll=listSpecies, - medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC), - medianPourcentCatchMainSpeciesTotal=medianPourcentCatchMainSpeciesTotal, - medianPourcentCatchMainSpeciesLogevent=medianPourcentCatchMainSpeciesLogevent, - propCatchMainSpeciesAll=propCatchListSpecies) - } - - return(explo_species) - -} +################################################################################ +# EXPLORE THE SELECTED SPECIES DEPENDING ON THE METHOD (HAC, TOTALE, LOGEVENT) # +# AND THE THRESHOLD CHOSEN # +################################################################################ + + + +#' Identyfing in an EFLALO dataset which species can be considered as important +#' for the analysis of target species, by crossing three different approaches. +#' +#' A typical logbook dataset will contain a large number of species recorded, +#' but only a limited number of these could potentially be considered as target +#' species. This function aims thus at identifying these by using three +#' different approaches : - HAC (Hierarchical Ascending Classification) based +#' on Euclidian distances between species with Ward aggregating criteria; - +#' Total, where species are ranked based on their proportion in the total +#' catch, and those cumulating to a given percentage are retained - Logevent, +#' where species are selected if they represent at least a given percentage of +#' the logevent's catch for at least one logevent (one line) +#' +#' The HAC and the Logevent methods work on catch data transformed in +#' percentage of species by logevent (line), in order to remove the effect of +#' large hauls compared to small hauls. The Total method works on raw data. In +#' the HAC method, a first group of species, the residual ones, is identified +#' by clustering and using a first-order scree test for cutting the tree. +#' Other species are pooled in the group of principals. New similar HACs are +#' run through a loop on this group of residuals species, to identify if any +#' new species might have been left aside in the first run. It is important to +#' note though that HAC method might quickly reach memory limits on standard +#' PCs, and may thus not be run on very large datasets. In the Total method, +#' the percentage threshold is being increased with 5\% steps from 5 to 100, +#' and the ranked species summing up to this value is recorded. In the +#' Logevent method, the percentage threshold is also being increased with 5\% +#' steps from 5 to 100, and all species representing at least this value in at +#' least one line are recorded. +#' +#' This function allows thus to explore the variability and the sensitivity of +#' the definition of key species to differences in concepts and subjective +#' thresholds. A plot showing the number of species retained according to these +#' two criteria is produced, allowing the user to make a qualitative choice +#' when this will be used for identyfing metiers. Empirical experience led us +#' to suggest that the combination of species entering either the HAC, or 95\% +#' of the total cumulated catch, or 100\% of at least one logevent would be +#' sufficient to cover the main target species of a standard logbook dataset, +#' but other choices could be made. +#' +#' +#' @param dat a data.frame reduced from an eflalo format. It should contain +#' only the LE_ID (Logevent ID) variable as well as all species names in +#' columns, with raw catch data. It is necessary to sort out potential +#' error-prone lines (such as lines with only 0) prior to the analysis, and to +#' replace NA values by 0. +#' @param analysisName character, the name of the run. Used for the file name +#' of the plots. +#' @param RunHAC Boolean. In case of very large datasets, memory limits might +#' be reached with HAC method. This option allows thus to skip the method and +#' consider species selection using the two other methods only. +#' @param DiagFlag Boolean. If DiagFlag=TRUE, additional plots and diagnostics +#' are produced. Not very used. +#' @return The function produces a plot (saved in the "analysisName" working +#' directory showing the number of species selected according to the method and +#' the percentage threshold selected for both 'Total' and 'Logevent' methods. +#' The function returns also a list of diagnostics of the three methods : +#' \item{nbAllSpecies}{Number of species initially in the dataset } +#' \item{propNbMainSpeciesHAC}{Proportion of the number of species retained by +#' the HAC method to the total number of species } +#' \item{propNbMainSpeciesTotal}{Proportion of the number of species retained +#' by the Total method to the total number of species } +#' \item{propNbMainSpeciesLogevent}{Proportion of the number of species +#' retained by the Logevent method to the total number of species } +#' \item{nbMainSpeciesHAC}{Number of species retained by the HAC method } +#' \item{nbMainSpeciesTotal}{Number of species retained by the Total method +#' with a percentage threshold increasing at 5\% step from 5\% to 100\% } +#' \item{nbMainSpeciesLogevent}{Number of species retained by the Logevent +#' method with a percentage threshold increasing at 5\% step from 5\% to 100\% +#' } \item{namesMainSpeciesHAC}{Names of species retained by the HAC method } +#' \item{namesMainSpeciesTotalAlphabetical}{Names of species retained by the +#' Total method with 95\% threshold in alphabetical order, for easier +#' comparison with the two other methods } +#' \item{namesMainSpeciesTotalByImportance}{Names of species retained by the +#' Total method with 95\% threshold in ranked order of importance } +#' \item{namesMainSpeciesLogevent}{Names of species retained by the Logevent +#' method with 100\% threshold } \item{namesMainSpeciesAll}{Unique combination +#' of the species retained in either HAC method, Total method with 95\% +#' threshold, and Logevent method with 100\% threshold } +#' \item{propCatchMainSpeciesAll}{Proportion of the total catch represented by +#' the selected species (species in namesMainSpeciesAll) } +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @seealso \code{\link{extractTableMainSpecies}} +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @examples +#' +#' +#' +#' data(eflalo) +#' +#' eflalo <- formatEflalo(eflalo) +#' +#' eflalo <- eflalo[eflalo$LE_GEAR=="OTB",] +#' +#' # note that output plots will be sent to getwd() +#' analysisName <- "metier_analysis_OTB" +#' +#' dat <- eflalo[,c("LE_ID",grep("EURO",colnames(eflalo),value=TRUE))] +#' names(dat)[-1] <- unlist(lapply(strsplit(names(dat[,-1]),"_"),function(x) x[[3]])) +#' +#' explo <- selectMainSpecies(dat, analysisName, RunHAC=TRUE, DiagFlag=FALSE) +#' #=> send the LE_ID and LE_EURO_SP columns only +#' +#' @export selectMainSpecies +selectMainSpecies=function(dat,analysisName="",RunHAC=TRUE,DiagFlag=FALSE){ + + require(FactoMineR) # function PCA + require(cluster) # functions pam & clara + require(SOAR) # function Store + require(amap) # function hcluster + require(MASS) # function lda + require(mda) # function fda + + p=ncol(dat) # Number of species + n=nrow(dat) + + # Transform quantities to proportions of total quantity caught by logevent + print("calculating proportions...") + + propdat=transformation_proportion(dat[,2:p]) + nameSpecies=colnames(propdat) + nbAllSpecies=ncol(propdat) + + t1=Sys.time() + + if (RunHAC == TRUE) { + + # METHOD : 'HAC' + + print("######## SPECIES EXPLORATION METHOD 1: HAC ########") + # Transposing data + table_var=table_variables(propdat) + + # HAC + print("cluster...") + cah_var=hcluster(table_var, method="euclidean", link="ward") + + Store(objects()) + gc(reset=TRUE) + + # Select the number of clusters by scree-test + inerties.vector=cah_var$height[order(cah_var$height,decreasing=TRUE)] + nb.finalclusters=which(scree(inerties.vector)[,"epsilon"]<0)[1] + + if(!is.na(nb.finalclusters)){ + # Dendogram cutting at the selected level + cah_cluster_var=cutree(cah_var,k=nb.finalclusters) + + png(paste(analysisName,"HAC_Dendogram_Step1.png",sep="_"), width = 1200, height = 800) + plot(cah_var,labels=FALSE,hang=-1,ann=FALSE) + title(main="HAC dendogram",xlab="Species",ylab="Height") + rect.hclust(cah_var, k=nb.finalclusters) + dev.off() + + temp=select_species(dat[,2:p],cah_cluster_var) + namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species + + fait=FALSE + nb_cut=1 + while ((fait == FALSE) && (nb_cut < (p-nb.finalclusters-2))) { + # cutting below + print(paste("----------- nb_cut =",nb_cut)) + cah_cluster_var_step=cutree(cah_var,k=(nb.finalclusters+nb_cut)) + # testing residual species + print(paste("------------- Residual species cluster(s) ",unique(cah_cluster_var_step[namesResidualSpecies]))) + if (length(unique(cah_cluster_var_step[namesResidualSpecies]))==1) { + print(paste("------------- No residual cut -----")) + nb_cut = nb_cut+1 # cutting below + }else{ + print("------------- Residual cut -----") + nbSpeciesClusters=table(cah_cluster_var_step[namesResidualSpecies]) + # testing if a species is alone in a group + if (sort(nbSpeciesClusters)[1]>1) { # if not alone + print("------- I stop and have a beer ------") + fait = TRUE # then I stop + }else{ + print("------ Updating residual species -----") + nb_cut = nb_cut+1; # if alone then cutting below and updating nameResidualSpecies to start again + numGroupSpeciesAlone = as.numeric(names(sort(nbSpeciesClusters)[1])) + namesSpeciesAlone = names(cah_cluster_var_step)[which(cah_cluster_var_step==numGroupSpeciesAlone)] + namesResidualSpecies = namesResidualSpecies[ - which(namesResidualSpecies==namesSpeciesAlone)] + print(paste("---- Adding new species ---",namesSpeciesAlone)) + } + } + } # end of while + + + # If all species are selected step by step, the final k is the initial cut (nb.finalclusters) + if((nb.finalclusters+nb_cut)>=(p-2)){ + kFinal=nb.finalclusters + cah_cluster_var=cutree(cah_var,k=kFinal) + temp=select_species(dat[,2:p],cah_cluster_var) + namesResidualSpecies=nameSpecies[which(cah_cluster_var==temp[[2]])] #list of residual species + } + + + # Dendogram of the first cut in the residual species cluster + png(paste(analysisName,"HAC_Dendogram_Step1_ResidualSpecies.png",sep="_"), width = 1200, height = 800) + plot(cah_var,labels=FALSE,hang=-1,ann=FALSE) + title(main="HAC dendogram - Step",xlab="Species",ylab="Height") + if((nb.finalclusters+nb_cut)>=(p-2)){ + rect.hclust(cah_var, k=kFinal) + }else{ + rect.hclust(cah_var, k=(nb.finalclusters+nb_cut)) + } + dev.off() + + # Selection of main species + nomespsel=setdiff(nameSpecies,namesResidualSpecies) + cat("main species : ",nomespsel,"\n") + + # Return the dataset retaining only the main species + nbMainSpeciesHAC=length(nomespsel) + namesMainSpeciesHAC=nomespsel + propNbMainSpeciesHAC=nbMainSpeciesHAC/nbAllSpecies*100 + + if(DiagFlag==TRUE) { + datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel) + pourcentCatchMainSpeciesHAC=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100 + medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC) + } + + Store(objects()) + gc(reset=TRUE) + + } else { + namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA + } + + print(Sys.time()-t1) + + }else{ namesMainSpeciesHAC=NA; nbMainSpeciesHAC=as.numeric(NA); medianPourcentCatchMainSpeciesHAC=as.numeric(NA); propNbMainSpeciesHAC=NA } + + + # METHOD : 'TOTALE' + + print("######## SPECIES EXPLORATION METHOD 2: 'TOTAL' ########") + + # Total quantity caught by species + sumcol=numeric(length=p-1) + for(i in 2:p){ + sumcol[i-1]=sum(dat[,i], na.rm=TRUE) + } + names(sumcol)=names(dat)[-1] + + # Percent of each species in the total catch + propesp=sumcol/sum(sumcol,na.rm=TRUE)*100 + # Columns number of each species by decreasing order of capture + numesp=order(propesp,decreasing=TRUE) + # Percent of each species in the total catch by cumulated decreasing order + propesp=cumsum(propesp[order(propesp,decreasing=TRUE)]) + + # We are taking all species until having at least seuil% of total catch + nbMainSpeciesTotal=numeric() + medianPourcentCatchMainSpeciesTotal=numeric() + + for(seuil in seq(5,100,5)){ + cat("seuil:",seuil,"\n") + pourcent=which(propesp<=seuil) + # We are taking the name of selected species + espsel=numesp[1:(length(pourcent)+1)] + nomespsel=nameSpecies[espsel] + nbMainSpeciesTotal[seuil/5]=length(nomespsel) + + if(DiagFlag==TRUE) { + # We are building the table with main species and aggregated other species + datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel) + if(length(nomespsel)==1){ + vectorNul=rep(0,n) + datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul) + } + pourcentCatchMainSpeciesTotal=apply(datSpeciesWithoutProp,1,sum, na.rm=TRUE)/apply(dat[,2:p],1,sum, na.rm=TRUE)*100 + medianPourcentCatchMainSpeciesTotal[seuil/5]=median(pourcentCatchMainSpeciesTotal) + } + } + nbMainSpeciesTotal=c(0,nbMainSpeciesTotal) + nbMainSpeciesTotal[length(nbMainSpeciesTotal)]=p-1 + namesMainSpeciesTotal=nomespsel[1:nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]] + propNbMainSpeciesTotal=nbMainSpeciesTotal[length(nbMainSpeciesTotal)-1]/nbAllSpecies*100 + + if (DiagFlag) medianPourcentCatchMainSpeciesTotal=c(0,medianPourcentCatchMainSpeciesTotal) + + Store(objects()) + gc(reset=TRUE) + + print(Sys.time()-t1) + + + + # METHOD : 'LOGEVENT' + + print("######## SPECIES EXPLORATION METHOD 3: 'LOGEVENT' ########") + + nbMainSpeciesLogevent=numeric() + medianPourcentCatchMainSpeciesLogevent=numeric() + + for(seuil in seq(5,100,5)){ + cat("seuil:",seuil,"\n") + nomespsel=character() + # We are taking all species with a % of catch >= seuil% for at least one logevent + for (i in nameSpecies) if (!is.na(any(propdat[,i]>=seuil)) && any(propdat[,i]>=seuil)) nomespsel <- c(nomespsel,i) + nbMainSpeciesLogevent[seuil/5]=length(nomespsel) + + # We are building the table with main species and aggregated other species + if(DiagFlag==TRUE) { + datSpeciesWithoutProp=building_tab_pca(dat[,2:p],nomespsel) + if(length(nomespsel)==1){ + vectorNul=rep(0,n) + datSpeciesWithoutProp=cbind(datSpeciesWithoutProp,vectorNul) + } + pourcentCatchMainSpeciesLogevent=apply(datSpeciesWithoutProp,1,sum)/apply(dat[,2:p],1,sum)*100 + medianPourcentCatchMainSpeciesLogevent[seuil/5]=median(pourcentCatchMainSpeciesLogevent) + } + } + nbMainSpeciesLogevent=c(p-1,nbMainSpeciesLogevent) + namesMainSpeciesLogevent=nomespsel + propNbMainSpeciesLogevent=nbMainSpeciesLogevent[length(nbMainSpeciesLogevent)]/nbAllSpecies*100 + + + if(DiagFlag) medianPourcentCatchMainSpeciesLogevent=c(100,medianPourcentCatchMainSpeciesLogevent) + + print(Sys.time()-t1) + + # GRAPHICS + + # Number of main species + X11(5,5) + plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species") + lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3) + if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3) + axis(1) + axis(2, las=2) + box() + legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n") + savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png") + dev.off() + + X11(5,5) + plot(seq(0,100,5),nbMainSpeciesTotal,type='l',col="blue",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species") + lines(seq(0,100,5),nbMainSpeciesLogevent,col="green",lwd=3) + if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="red",lwd=3) + axis(1) + axis(2, las=2) + box() + legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("red", "blue", "green"),bty="n") + savePlot(filename = paste(analysisName,'Number of main species',sep="_"),type ="png") + dev.off() + + # Black and white version + X11(5,5) + plot(seq(0,100,5),nbMainSpeciesTotal, type='l' ,lty='dashed', col="black",lwd=3, axes=FALSE, xlab="Threshold (%)",ylab="Number of species") + lines(seq(0,100,5),nbMainSpeciesLogevent, type='l', lty='dotted', col="black",lwd=3) + if(!is.na(nbMainSpeciesHAC)) segments(0,nbMainSpeciesHAC,100,nbMainSpeciesHAC,col="black",lwd=3) + axis(1) + axis(2, las=2) + box() + legend(20, p*0.9, c( "HAC", "PerTotal", "PerLogevent"),lwd=3,col=c("black", "black", "black"),bty="n",lty=c('solid','dashed','dotted'),box.lty = par("lty")) + savePlot(filename = paste(analysisName,'Number of main species_new_2',sep="_"),type ="png") + dev.off() + + # Median percentage of catch represented by main species by logevent + if(DiagFlag){ + png(paste(analysisName,"Median percentage of catch represented by main species by logevent.png",sep="_"), width = 1200, height = 800) + plot(seq(0,100,5),medianPourcentCatchMainSpeciesTotal,type='l',col="blue",lwd=2, main="Median percentage of catch represented by main species by logevent depending of the threshold", xlab="Threshold (%)",ylab="Median percentage of catch represented by main species by logevent") + lines(seq(0,100,5),medianPourcentCatchMainSpeciesLogevent,col="green",lwd=2) + if (RunHAC==TRUE) abline(medianPourcentCatchMainSpeciesHAC,0, col="red",lwd=2) + mtext(paste(p-1," Species"),col='darkblue') + if (RunHAC==TRUE) legend(70, 40, c("HAC", "Total", "Logevent"),lwd=2,col=c("red", "blue", "green")) + if (RunHAC==FALSE) legend(70, 40, c("Total", "Logevent"),lwd=2,col=c("blue", "green")) + dev.off() + } + + listSpecies=sort(unique(c(namesMainSpeciesHAC,namesMainSpeciesTotal,namesMainSpeciesLogevent))) + + # Proportion of the total catch represented by the species in listSpecies (= namesMainSpeciesAll) + catchListSpecies=sumcol[listSpecies] + propCatchListSpecies=sum(catchListSpecies)/sum(sumcol)*100 + + + if(DiagFlag==FALSE) { + explo_species = list(nbAllSpecies=nbAllSpecies, + propNbMainSpeciesHAC=propNbMainSpeciesHAC, + propNbMainSpeciesTotal=propNbMainSpeciesTotal, + propNbMainSpeciesLogevent=propNbMainSpeciesLogevent, + nbMainSpeciesHAC=nbMainSpeciesHAC, + nbMainSpeciesTotal=nbMainSpeciesTotal, + nbMainSpeciesLogevent=nbMainSpeciesLogevent, + namesMainSpeciesHAC=sort(namesMainSpeciesHAC), + namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal), + namesMainSpeciesTotalByImportance=namesMainSpeciesTotal, + namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent), + namesMainSpeciesAll=listSpecies, + propCatchMainSpeciesAll=propCatchListSpecies) + }else{ + explo_species = list(nbAllSpecies=nbAllSpecies, + propNbMainSpeciesHAC=propNbMainSpeciesHAC, + propNbMainSpeciesTotal=propNbMainSpeciesTotal, + propNbMainSpeciesLogevent=propNbMainSpeciesLogevent, + nbMainSpeciesHAC=nbMainSpeciesHAC, + nbMainSpeciesTotal=nbMainSpeciesTotal, + nbMainSpeciesLogevent=nbMainSpeciesLogevent, + namesMainSpeciesHAC=sort(namesMainSpeciesHAC), + namesMainSpeciesTotalAlphabetical=sort(namesMainSpeciesTotal), + namesMainSpeciesTotalByImportance=namesMainSpeciesTotal, + namesMainSpeciesLogevent=sort(namesMainSpeciesLogevent), + namesMainSpeciesAll=listSpecies, + medianPourcentCatchMainSpeciesHAC=median(pourcentCatchMainSpeciesHAC), + medianPourcentCatchMainSpeciesTotal=medianPourcentCatchMainSpeciesTotal, + medianPourcentCatchMainSpeciesLogevent=medianPourcentCatchMainSpeciesLogevent, + propCatchMainSpeciesAll=propCatchListSpecies) + } + + return(explo_species) + +} diff --git a/vmstools/R/select_species.r b/vmstools/R/select_species.r index 229054e..1ab36ef 100644 --- a/vmstools/R/select_species.r +++ b/vmstools/R/select_species.r @@ -1,17 +1,45 @@ -######################################################## -# Remove the cluster with the smallest mean of capture # -######################################################## - -select_species=function(data,groupes_cah){ - nb.classes=length(levels(as.factor(groupes_cah))) - moyennes=numeric(nb.classes) - for(i in 1:nb.classes){ - namegp=names(which(groupes_cah==i)) - effgp=length(which(groupes_cah==i)) - moyennes[i]=sum(data[namegp],na.rm=TRUE)/effgp - } - indice.autre=which(moyennes == min(moyennes,na.rm=TRUE)) - noms=names(which(groupes_cah!=indice.autre)) - return(list(noms,indice.autre)) -} - +######################################################## +# Remove the cluster with the smallest mean of capture # +######################################################## + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export select_species +select_species=function(data,groupes_cah){ + nb.classes=length(levels(as.factor(groupes_cah))) + moyennes=numeric(nb.classes) + for(i in 1:nb.classes){ + namegp=names(which(groupes_cah==i)) + effgp=length(which(groupes_cah==i)) + moyennes[i]=sum(data[namegp],na.rm=TRUE)/effgp + } + indice.autre=which(moyennes == min(moyennes,na.rm=TRUE)) + noms=names(which(groupes_cah!=indice.autre)) + return(list(noms,indice.autre)) +} + diff --git a/vmstools/R/shortenEflalo.r b/vmstools/R/shortenEflalo.r index 1e8bd99..ed5d8bb 100644 --- a/vmstools/R/shortenEflalo.r +++ b/vmstools/R/shortenEflalo.r @@ -1,9 +1,27 @@ -shortenEflalo <- function(data=eflalo2,which.species = c("ANE","BIB","BLL","COD","DAB","HAD","HER","MAC","NEP","PLE","SOL","WHG") ) -{ -#Eflalo is an unwieldy format so this is handy if you want to select a few species -dn <- dimnames(data)[[2]] -yp <- NULL -for(ss in which.species){yp <- c(yp,grep(ss,dn)) } -short.eflalo2 <- data[,c(1:26,yp)] -short.eflalo2 -} \ No newline at end of file +#' Cuts up eflalo according to any particular combination of species to make it +#' more manageable +#' +#' Cuts up eflalo according to any particular combination of species to make it +#' more manageable +#' +#' +#' @param data eflalo formatted data +#' @param which.species array of species names in FAO species names +#' @author Doug Beare +#' @references EU lot 2 project +#' @examples +#' +#' data(eflalo) +#' shortenEflalo(data = eflalo, which.species = c("PLE")) +#' +#' +#' @export shortenEflalo +shortenEflalo <- function(data=eflalo2,which.species = c("ANE","BIB","BLL","COD","DAB","HAD","HER","MAC","NEP","PLE","SOL","WHG") ) +{ +#Eflalo is an unwieldy format so this is handy if you want to select a few species +dn <- dimnames(data)[[2]] +yp <- NULL +for(ss in which.species){yp <- c(yp,grep(ss,dn)) } +short.eflalo2 <- data[,c(1:26,yp)] +short.eflalo2 +} diff --git a/vmstools/R/sortTacsat.r b/vmstools/R/sortTacsat.r index 2425304..8aab895 100644 --- a/vmstools/R/sortTacsat.r +++ b/vmstools/R/sortTacsat.r @@ -1,13 +1,34 @@ -`sortTacsat` <- -function(dat){ -require(doBy) - -if(!"SI_DATIM" %in% colnames(dat)) dat$SI_DATIM <- as.POSIXct(paste(dat$SI_DATE, dat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - - #Sort the tacsat data first by ship, then by date -if("VE_REF" %in% colnames(dat)) dat <- orderBy(~VE_REF+SI_DATIM,data=dat) -if("OB_REF" %in% colnames(dat)) dat <- orderBy(~OB_REF+SI_DATIM,data=dat) - -return(dat)} - - \ No newline at end of file +#' Sorting Tacsat dataframe +#' +#' Sort the Tacsat data first by vessel, then by date, speed and heading. Needs +#' to be in this order to be effectively used in other EU lot 2 project generic +#' functions. +#' +#' +#' @param dat tacsat dataframe +#' @note Uses library(doBy) +#' @author Niels T. Hintzen +#' @seealso \code{\link{filterTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' data(tacsat) +#' require(doBy) +#' +#' #Sort the Tacsat data +#' tacsat <- sortTacsat(tacsat) +#' +#' @export sortTacsat +`sortTacsat` <- +function(dat){ +require(doBy) + +if(!"SI_DATIM" %in% colnames(dat)) dat$SI_DATIM <- as.POSIXct(paste(dat$SI_DATE, dat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + + #Sort the tacsat data first by ship, then by date +if("VE_REF" %in% colnames(dat)) dat <- orderBy(~VE_REF+SI_DATIM,data=dat) +if("OB_REF" %in% colnames(dat)) dat <- orderBy(~OB_REF+SI_DATIM,data=dat) + +return(dat)} + + diff --git a/vmstools/R/splitAmongPings.r b/vmstools/R/splitAmongPings.r index 41ceff0..e160046 100644 --- a/vmstools/R/splitAmongPings.r +++ b/vmstools/R/splitAmongPings.r @@ -1,292 +1,353 @@ -splitAmongPings <- function(tacsat,eflalo,variable="all",level="day",conserve=TRUE,by=NULL,returnAll=T){ - - #level: day,ICESrectangle,trip - #conserve: T,F - #variable: kgs,value,effort - require(data.table) - #- Create extra columns with time stamps - if(!"FT_REF" %in% colnames(tacsat)) stop("tacsat file needs FT_REF detailing trip number") - if(!"SI_STATE" %in% colnames(tacsat)) stop("tacsat file needs SI_STATE detailing activity of vessel") - if(level == "trip" & conserve == TRUE) stop("conserve catches only at level = ICESrectangle or day") - - if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") - if(!"LE_CDATIM" %in% colnames(eflalo)) eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT, tz="GMT", format="%d/%m/%Y") - - if(is.null(by)==FALSE){ - if(any(is.na(tacsat[,by])) | any(tacsat[,by] == 0)) stop("'by' column in tacsat contains NA or zero's. Cannot execute with NA's or zeros") - } - - #- Levels have hierachy, and need to be suplemented with lower levels - if(level == "day"){ level <- c("day","ICESrectangle","trip") - } else { - if(level == "ICESrectangle"){ level <- c("ICESrectangle","trip") - } else { - if(level == "trip"){ - level <- c("trip") - } - } - } - - #- Add ID to keep track of merged and non-merged sets - tacsat$ID <- 1:nrow(tacsat) - - #- identifyers of eflalo colnames - eflaloCol <- colnames(eflalo) - kgs <- grep("LE_KG",colnames(eflalo)) - eur <- grep("LE_EURO",colnames(eflalo)) - - #- Subset tacsat file - remtacsat <- subset(tacsat,SI_STATE==0) - tacsat <- subset(tacsat,SI_STATE != 0) #only attribute variable to fishing pings - tacsatTrip <- subset(tacsat,FT_REF != 0) - remainTacsat <- sort(unique(tacsatTrip$ID)) - - #- Subset eflalo file - eflalo$ID <- 1:nrow(eflalo) - eflaloTrip <- subset(eflalo, FT_REF %in% sort(unique(tacsatTrip$FT_REF)) & VE_REF %in% sort(unique(tacsatTrip$VE_REF))) - #eflaloNoTrip <- subset(eflalo, !FT_REF %in% sort(unique(tacsatTrip$FT_REF))) - eflaloNoTrip <- eflalo[which(!eflalo$ID %in% eflaloTrip$ID),-match("ID",colnames(eflalo))] - #eflaloVessel <- subset(eflaloNoTrip, VE_REF %in% sort(unique(tacsatTrip$VE_REF))) - #eflaloNoVessel <- subset(eflaloNoTrip,!VE_REF %in% sort(unique(tacsatTrip$VE_REF))) - eflaloVessel <- eflaloNoTrip[which(paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),] - eflaloNoVessel <- eflaloNoTrip[which(!paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),] - - #------------------------------------------------------------------------------- - # 1a) Merge eflalo to tacsat with matching FT_REF - #------------------------------------------------------------------------------- - - if(dim(tacsatTrip)[1]>0 & dim(eflaloTrip)[1] >0){ - if("day" %in% level){ - print("level: day") - if(!"SI_YEAR" %in% colnames(tacsatTrip)) tacsatTrip$SI_YEAR <- an(format(tacsatTrip$SI_DATIM,format="%Y")) - if(!"SI_DAY" %in% colnames(tacsatTrip)) tacsatTrip$SI_DAY <- an(format(tacsatTrip$SI_DATIM,format="%j")) - if(!"LE_RECT" %in% colnames(tacsatTrip)) tacsatTrip$LE_RECT <- ICESrectangle(tacsatTrip) - - if(!"SI_YEAR" %in% colnames(eflaloTrip)) eflaloTrip$SI_YEAR <- an(format(eflaloTrip$LE_CDATIM,format="%Y")) - if(!"SI_DAY" %in% colnames(eflaloTrip)) eflaloTrip$SI_DAY <- an(format(eflaloTrip$LE_CDATIM,format="%j")) - - #- Count pings in tacsat set - nPings <- countPings(~year+VE_REF+FT_REF+icesrectangle+day,tacsatTrip,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT","SI_DAY"),eflaloCol[c(kgs,eur)],remainTacsat,by=by) - eflaloTrip <- res[["eflalo"]] - byDayTacsat <- res[["tacsat"]] - remainTacsat <- res[["remainTacsat"]] - } - if("ICESrectangle" %in% level){ - print("level: rectangle") - if(!"SI_YEAR" %in% colnames(tacsatTrip)) tacsatTrip$SI_YEAR <- an(format(tacsatTrip$SI_DATIM,format="%Y")) - if(!"LE_RECT" %in% colnames(tacsatTrip)) tacsatTrip$LE_RECT <- ICESrectangle(tacsatTrip) - - if(!"SI_YEAR" %in% colnames(eflaloTrip)) eflaloTrip$SI_YEAR <- an(format(eflaloTrip$LE_CDATIM,format="%Y")) - - #- Count pings in tacsat set - nPings <- countPings(~year+VE_REF+FT_REF+icesrectangle,tacsatTrip,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT"), eflaloCol[c(kgs,eur)],remainTacsat,by=by) - eflaloTrip <- res[["eflalo"]] - byRectTacsat <- res[["tacsat"]] - remainTacsat <- res[["remainTacsat"]] - } - if("trip" %in% level){ - print("level: trip") - if(!"SI_YEAR" %in% colnames(tacsatTrip)) tacsatTrip$SI_YEAR <- an(format(tacsatTrip$SI_DATIM,format="%Y")) - if(!"SI_YEAR" %in% colnames(eflaloTrip)) eflaloTrip$SI_YEAR <- an(format(eflaloTrip$LE_CDATIM,format="%Y")) - - #- Count pings in tacsat set - nPings <- countPings(~year+VE_REF+FT_REF,tacsatTrip,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF"), eflaloCol[c(kgs,eur)],remainTacsat,by=by) - eflaloTrip <- res[["eflalo"]] - byTripTacsat <- res[["tacsat"]] - remainTacsat <- res[["remainTacsat"]] - } - #------------------------------------------------------------------------------- - # 1b) Bind all tacsat files with matching FT_REF - #------------------------------------------------------------------------------- - - if(length(remainTacsat) > 0) warning("Not all tacsat records with tripnumber have been merged!!") - if(nrow(eflaloTrip) > 0) warning("Not all eflalo records with matching VMS tripnumber have been merged!!") - if("day" %in% level){ tacsatFTREF <- rbind(byDayTacsat,byRectTacsat,byTripTacsat) - } else { - if("ICESrectangle" %in% level){ tacsatFTREF <- rbind(byRectTacsat,byTripTacsat) - } else { tacsatFTREF <- byTripTacsat}} - tacsatFTREF[,kgeur(colnames(tacsatFTREF))] <- sweep(tacsatFTREF[,kgeur(colnames(tacsatFTREF))],1,tacsatFTREF$pings,"/") - tacsatFTREF$ID <- af(ac(tacsatFTREF$ID.x)) - DT <- data.table(tacsatFTREF) - eq1 <- c.listquote(paste("sum(",colnames(tacsatFTREF[,kgeur(colnames(tacsatFTREF))]),",na.rm=TRUE)",sep="")) - tacsatFTREF <- DT[,eval(eq1),by=ID.x]; tacsatFTREF <- data.frame(tacsatFTREF); setnames(tacsatFTREF,colnames(tacsatFTREF),c("ID",colnames(eflaloTrip[,kgeur(colnames(eflaloTrip))]))) - } - - #------------------------------------------------------------------------------- - # 2a) Merge eflalo to tacsat with no matching FT_REF - #------------------------------------------------------------------------------- - - #- If you don't want to loose catch or value data, conserve the non-merged - # eflalo catches and distribute these over the tacsat records - if(conserve == TRUE){ - if(dim(tacsat)[1]>0 & dim(eflaloVessel)[1] > 0){ - - #------------------------------------------------------------------------------- - # 2a-1) Merge eflalo to tacsat with matching VE_REF - #------------------------------------------------------------------------------- - - if("day" %in% level){ - print("level: day & conserve = T, by vessel") - if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) - if(!"SI_DAY" %in% colnames(tacsat)) tacsat$SI_DAY <- an(format(tacsat$SI_DATIM,format="%j")) - if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) - - if(!"SI_YEAR" %in% colnames(eflaloVessel)) eflaloVessel$SI_YEAR <- an(format(eflaloVessel$LE_CDATIM,format="%Y")) - if(!"SI_DAY" %in% colnames(eflaloVessel)) eflaloVessel$SI_DAY <- an(format(eflaloVessel$LE_CDATIM,format="%j")) - - #- Count pings in tacsat set - nPings <- countPings(~year+VE_REF+icesrectangle+day,tacsat,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT","SI_DAY"), eflaloCol[c(kgs,eur)],NULL,by=by) - eflaloVessel <- res[["eflalo"]] - byDayTacsat <- res[["tacsat"]] - } - - if("ICESrectangle" %in% level){ - print("level: rectangle & conserve = T, by vessel") - if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) - if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) - - if(!"SI_YEAR" %in% colnames(eflaloVessel)) eflaloVessel$SI_YEAR <- an(format(eflaloVessel$LE_CDATIM,format="%Y")) - - #- Count pings in tacsat set - nPings <- countPings(~year+VE_REF+icesrectangle,tacsat,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT"), eflaloCol[c(kgs,eur)],NULL,by=by) - eflaloVessel <- res[["eflalo"]] - byRectTacsat <- res[["tacsat"]] - } - if(TRUE){ #-For remainder of vessel merging not at ICESrectangle level - print("level: year & conserve = T, by vessel") - if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) - if(!"SI_YEAR" %in% colnames(eflaloVessel)) eflaloVessel$SI_YEAR <- an(format(eflaloVessel$LE_CDATIM,format="%Y")) - - #- Count pings in tacsat set - nPings <- countPings(~year+VE_REF,tacsat,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF" ), eflaloCol[c(kgs,eur)],NULL,by=by) - eflaloVessel <- res[["eflalo"]] - byVessTacsat <- res[["tacsat"]] - } - - #------------------------------------------------------------------------------- - # 2b-1) Bind all tacsat files with matching VE_REF - #------------------------------------------------------------------------------- - - if("day" %in% level){ tacsatVEREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat) - } else { - if("ICESrectangle" %in% level){ - tacsatVEREF <- rbind(byRectTacsat,byVessTacsat) - } else { tacsatVEREF <- byVessTacsat}} - tacsatVEREF[,kgeur(colnames(tacsatVEREF))] <- sweep(tacsatVEREF[,kgeur(colnames(tacsatVEREF))],1,tacsatVEREF$pings,"/") - tacsatVEREF$ID <- af(ac(tacsatVEREF$ID.x)) - DT <- data.table(tacsatVEREF) - eq1 <- c.listquote(paste("sum(",colnames(tacsatVEREF[,kgeur(colnames(tacsatVEREF))]),",na.rm=TRUE)",sep="")) - tacsatVEREF <- DT[,eval(eq1),by=ID.x]; tacsatVEREF <- data.frame(tacsatVEREF); setnames(tacsatVEREF,colnames(tacsatVEREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))]))) - } - - if(dim(tacsat)[1] > 0 & dim(eflaloNoVessel)[1] > 0){ - #------------------------------------------------------------------------------- - # 2a-2) Merge eflalo to tacsat with no matching FT_REF or VE_REF - #------------------------------------------------------------------------------- - if("day" %in% level){ - print("level: day & conserve = T, no vessel match") - if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) - if(!"SI_DAY" %in% colnames(tacsat)) tacsat$SI_DAY <- an(format(tacsat$SI_DATIM,format="%j")) - if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) - - if(!"SI_YEAR" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_YEAR <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y")) - if(!"SI_DAY" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_DAY <- an(format(eflaloNoVessel$LE_CDATIM,format="%j")) - - #- Count pings in tacsat set - nPings <- countPings(~year+icesrectangle+day,tacsat,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT","SI_DAY"), eflaloCol[c(kgs,eur)],NULL,by=by) - eflaloNoVessel <- res[["eflalo"]] - byDayTacsat <- res[["tacsat"]] - } - - if("ICESrectangle" %in% level){ - print("level: rectangle & conserve = T, no vessel match") - if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) - if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) - - if(!"SI_YEAR" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_YEAR <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y")) - - #- Count pings in tacsat set - nPings <- countPings(~year+icesrectangle,tacsat,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT"), eflaloCol[c(kgs,eur)],NULL,by=by) - eflaloNoVessel <- res[["eflalo"]] - byRectTacsat <- res[["tacsat"]] - } - if(TRUE){ #-For remainder of merging not at ICESrectangle level - print("level: year & conserve = T, no vessel match") - if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) - if(!"SI_YEAR" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_YEAR <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y")) - - #- Count pings in tacsat set - nPings <- countPings(~year,tacsat,by=by) - - #- Do the merging of eflalo to tacsat - res <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR"), eflaloCol[c(kgs,eur)],NULL,by=by) - eflaloNoVessel <- res[["eflalo"]] - byVessTacsat <- res[["tacsat"]] - } - #------------------------------------------------------------------------------- - # 2b-2) Bind all tacsat files with no matching FT_REF or VE_REF - #------------------------------------------------------------------------------- - - if("day" %in% level){ tacsatREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat) - } else { - if("ICESrectangle" %in% level){ tacsatREF <- rbind(byRectTacsat,byVessTacsat) - } else { tacsatREF <- byVessTacsat}} - tacsatREF[,kgeur(colnames(tacsatREF))] <- sweep(tacsatREF[,kgeur(colnames(tacsatREF))],1,tacsatREF$pings,"/") - tacsatREF$ID <- af(ac(tacsatREF$ID.x)) - DT <- data.table(tacsatREF) - eq1 <- c.listquote(paste("sum(",colnames(tacsatREF[,kgeur(colnames(tacsatREF))]),",na.rm=TRUE)",sep="")) - tacsatREF <- DT[,eval(eq1),by=ID.x]; tacsatREF <- data.frame(tacsatREF); setnames(tacsatREF,colnames(tacsatREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))]))) - } - }#End conserve - - #------------------------------------------------------------------------------- - # 3) Merge all tacsat files together and return - #------------------------------------------------------------------------------- - - if(conserve==TRUE){ - if(exists("tacsatFTREF")){one <- tacsatFTREF} else{ one <- numeric()} - if(exists("tacsatVEREF")){two <- tacsatVEREF} else{ two <- numeric()} - if(exists("tacsatREF")) {three <- tacsatREF} else{ three <- numeric()} - tacsatTot <- rbind(one,two,three) - DT <- data.table(tacsatTot) - eq1 <- c.listquote(paste("sum(",colnames(tacsatTot[,kgeur(colnames(tacsatTot))]),",na.rm=TRUE)",sep="")) - tacsatTot <- DT[,eval(eq1),by=ID]; tacsatTot <- data.frame(tacsatTot); setnames(tacsatTot,colnames(tacsatTot),c("ID",colnames(eflalo[,kgeur(colnames(eflalo))]))) - tacsatReturn <- merge(tacsat,tacsatTot,by="ID",all.x=TRUE) - if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))] - if(variable == "kgs") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))] - if(variable == "all") tacsatReturn <- tacsatReturn - } else { - if(exists("tacsatFTREF")==FALSE){stop("You have selected not to conserve catches, but there is no trip identifier in the tacsat file")} - tacsatReturn <- merge(tacsat,tacsatFTREF,by="ID",all.x=TRUE) - if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))] - if(variable == "kgs") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))] - if(variable == "all") tacsatReturn <- tacsatReturn - } - if(returnAll & nrow(remtacsat)>0) - tacsatReturn <- orderBy(~ID,data=rbindTacsat(tacsatReturn,remtacsat)) - - return(orderBy(~ID,data=tacsatReturn)[,-match("ID",colnames(tacsatReturn))])} +#' Split values or landings from eflalo over tacsat pings +#' +#' Split the values or landings as listed in the eflalo file over the tacsat +#' pings, while taking different levels into account such as by day, +#' ICESrectangle or by trip number. Also there is a possibility to merge the +#' eflalo records without a matching tacsat trip. +#' +#' Levels have hierachy, so if "day" is specified, also "ICESrectangle" and +#' "trip" will be used. If "ICESrectangle" is specified also "trip" will be +#' used. "Trip" can be used on its own. Same hierachy applies to merging when +#' conserve = TRUE (except for trip level). +#' +#' Note that tacsat file needs a column SI_STATE which has 0 for non-fishing +#' and 1 for fishing records. +#' +#' @param tacsat Tacsat object +#' @param eflalo Eflalo object +#' @param variable Indicating what to split: "all","value","kgs" +#' @param level Levels can be: "day", "ICESrectangle", "trip" +#' @param conserve Logical, if kgs or value needs to be conserved if merging by +#' trip number is not possible (default = TRUE) +#' @param by Name of tacsat column by which KG and EURO should be dispatched. +#' Default to NULL which distributes KG and EURO equally by each ping. A tacsat +#' column can be used instead to generate a 'weighted' dispatch of KG and EURO. +#' @param returnAll Logical, whether all non-fishing pings should be returned +#' as well (default = FALSE) +#' @return Merged tacsat file will be returned including the splitted values +#' over the tacsat pings where SI_STATE is not zero. +#' @author Niels T. Hintzen, Francois Bastardie +#' @seealso \code{\link{mergeEflalo2Tacsat}}, \code{\link{mergeEflalo2Pings}} +#' @references EU Lot 2 project +#' @examples +#' +#' data(tacsat); tacsat <- tacsat[1:1000,] +#' data(eflalo); eflalo <- eflalo[1:1000,] +#' +#' tacsatp <- mergeEflalo2Tacsat(eflalo,tacsat) +#' +#' #- Create a column names SI_STATE which holds values 0 or 1 which denotes no +#' # fishing & fishing. +#' tacsatp$IDX <- 1:nrow(tacsatp) +#' tacsatFilter <- filterTacsat(tacsatp,st=c(1,6),hd=NULL,remDup=TRUE) +#' tacsatp$SI_STATE <- 0 +#' tacsatp$SI_STATE[tacsatFilter$IDX] <- 1 +#' +#' #-Add interval to tacsatp +#' tacsatp <- intervalTacsat(tacsatp,level="trip",fill.na=TRUE) +#' +#' tacsatp <- subset(tacsatp,SI_STATE == 1) +#' +#' tacsatEflalo <- splitAmongPings(tacsat=tacsatp,eflalo=eflalo, +#' variable="all",level="day",conserve=TRUE) +#' +#' +#' #- When using the 'by' statement, make sure the by column does not contain NA, +#' # or zeros +#' tacsatp <- subset(tacsatp,!is.na(tacsatp$INTV) | tacsatp$INTV != 0) +#' tacsatEflalo <- splitAmongPings(tacsat=tacsatp,eflalo=eflalo, +#' variable="all",level="day",conserve=TRUE,by="INTV") +#' +#' @export splitAmongPings +splitAmongPings <- function(tacsat,eflalo,variable="all",level="day",conserve=TRUE,by=NULL,returnAll=T){ + + #level: day,ICESrectangle,trip + #conserve: T,F + #variable: kgs,value,effort + require(data.table) + #- Create extra columns with time stamps + if(!"FT_REF" %in% colnames(tacsat)) stop("tacsat file needs FT_REF detailing trip number") + if(!"SI_STATE" %in% colnames(tacsat)) stop("tacsat file needs SI_STATE detailing activity of vessel") + if(level == "trip" & conserve == TRUE) stop("conserve catches only at level = ICESrectangle or day") + + if(!"SI_DATIM" %in% colnames(tacsat)) tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, sep=" "), tz="GMT", format="%d/%m/%Y %H:%M") + if(!"LE_CDATIM" %in% colnames(eflalo)) eflalo$LE_CDATIM <- as.POSIXct(eflalo$LE_CDAT, tz="GMT", format="%d/%m/%Y") + + if(is.null(by)==FALSE){ + if(any(is.na(tacsat[,by])) | any(tacsat[,by] == 0)) stop("'by' column in tacsat contains NA or zero's. Cannot execute with NA's or zeros") + } + + #- Levels have hierachy, and need to be suplemented with lower levels + if(level == "day"){ level <- c("day","ICESrectangle","trip") + } else { + if(level == "ICESrectangle"){ level <- c("ICESrectangle","trip") + } else { + if(level == "trip"){ + level <- c("trip") + } + } + } + + #- Add ID to keep track of merged and non-merged sets + tacsat$ID <- 1:nrow(tacsat) + + #- identifyers of eflalo colnames + eflaloCol <- colnames(eflalo) + kgs <- grep("LE_KG",colnames(eflalo)) + eur <- grep("LE_EURO",colnames(eflalo)) + + #- Subset tacsat file + remtacsat <- subset(tacsat,SI_STATE==0) + tacsat <- subset(tacsat,SI_STATE != 0) #only attribute variable to fishing pings + tacsatTrip <- subset(tacsat,FT_REF != 0) + remainTacsat <- sort(unique(tacsatTrip$ID)) + + #- Subset eflalo file + eflalo$ID <- 1:nrow(eflalo) + eflaloTrip <- subset(eflalo, FT_REF %in% sort(unique(tacsatTrip$FT_REF)) & VE_REF %in% sort(unique(tacsatTrip$VE_REF))) + #eflaloNoTrip <- subset(eflalo, !FT_REF %in% sort(unique(tacsatTrip$FT_REF))) + eflaloNoTrip <- eflalo[which(!eflalo$ID %in% eflaloTrip$ID),-match("ID",colnames(eflalo))] + #eflaloVessel <- subset(eflaloNoTrip, VE_REF %in% sort(unique(tacsatTrip$VE_REF))) + #eflaloNoVessel <- subset(eflaloNoTrip,!VE_REF %in% sort(unique(tacsatTrip$VE_REF))) + eflaloVessel <- eflaloNoTrip[which(paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),] + eflaloNoVessel <- eflaloNoTrip[which(!paste(eflaloNoTrip$VE_REF,format(eflaloNoTrip$LE_CDATIM,"%Y")) %in% unique(paste(tacsatTrip$VE_REF,format(tacsatTrip$SI_DATIM,"%Y")))),] + + #------------------------------------------------------------------------------- + # 1a) Merge eflalo to tacsat with matching FT_REF + #------------------------------------------------------------------------------- + + if(dim(tacsatTrip)[1]>0 & dim(eflaloTrip)[1] >0){ + if("day" %in% level){ + print("level: day") + if(!"SI_YEAR" %in% colnames(tacsatTrip)) tacsatTrip$SI_YEAR <- an(format(tacsatTrip$SI_DATIM,format="%Y")) + if(!"SI_DAY" %in% colnames(tacsatTrip)) tacsatTrip$SI_DAY <- an(format(tacsatTrip$SI_DATIM,format="%j")) + if(!"LE_RECT" %in% colnames(tacsatTrip)) tacsatTrip$LE_RECT <- ICESrectangle(tacsatTrip) + + if(!"SI_YEAR" %in% colnames(eflaloTrip)) eflaloTrip$SI_YEAR <- an(format(eflaloTrip$LE_CDATIM,format="%Y")) + if(!"SI_DAY" %in% colnames(eflaloTrip)) eflaloTrip$SI_DAY <- an(format(eflaloTrip$LE_CDATIM,format="%j")) + + #- Count pings in tacsat set + nPings <- countPings(~year+VE_REF+FT_REF+icesrectangle+day,tacsatTrip,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT","SI_DAY"),eflaloCol[c(kgs,eur)],remainTacsat,by=by) + eflaloTrip <- res[["eflalo"]] + byDayTacsat <- res[["tacsat"]] + remainTacsat <- res[["remainTacsat"]] + } + if("ICESrectangle" %in% level){ + print("level: rectangle") + if(!"SI_YEAR" %in% colnames(tacsatTrip)) tacsatTrip$SI_YEAR <- an(format(tacsatTrip$SI_DATIM,format="%Y")) + if(!"LE_RECT" %in% colnames(tacsatTrip)) tacsatTrip$LE_RECT <- ICESrectangle(tacsatTrip) + + if(!"SI_YEAR" %in% colnames(eflaloTrip)) eflaloTrip$SI_YEAR <- an(format(eflaloTrip$LE_CDATIM,format="%Y")) + + #- Count pings in tacsat set + nPings <- countPings(~year+VE_REF+FT_REF+icesrectangle,tacsatTrip,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF","LE_RECT"), eflaloCol[c(kgs,eur)],remainTacsat,by=by) + eflaloTrip <- res[["eflalo"]] + byRectTacsat <- res[["tacsat"]] + remainTacsat <- res[["remainTacsat"]] + } + if("trip" %in% level){ + print("level: trip") + if(!"SI_YEAR" %in% colnames(tacsatTrip)) tacsatTrip$SI_YEAR <- an(format(tacsatTrip$SI_DATIM,format="%Y")) + if(!"SI_YEAR" %in% colnames(eflaloTrip)) eflaloTrip$SI_YEAR <- an(format(eflaloTrip$LE_CDATIM,format="%Y")) + + #- Count pings in tacsat set + nPings <- countPings(~year+VE_REF+FT_REF,tacsatTrip,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloTrip,tacsatTrip,nPings,c("SI_YEAR","VE_REF","FT_REF"), eflaloCol[c(kgs,eur)],remainTacsat,by=by) + eflaloTrip <- res[["eflalo"]] + byTripTacsat <- res[["tacsat"]] + remainTacsat <- res[["remainTacsat"]] + } + #------------------------------------------------------------------------------- + # 1b) Bind all tacsat files with matching FT_REF + #------------------------------------------------------------------------------- + + if(length(remainTacsat) > 0) warning("Not all tacsat records with tripnumber have been merged!!") + if(nrow(eflaloTrip) > 0) warning("Not all eflalo records with matching VMS tripnumber have been merged!!") + if("day" %in% level){ tacsatFTREF <- rbind(byDayTacsat,byRectTacsat,byTripTacsat) + } else { + if("ICESrectangle" %in% level){ tacsatFTREF <- rbind(byRectTacsat,byTripTacsat) + } else { tacsatFTREF <- byTripTacsat}} + tacsatFTREF[,kgeur(colnames(tacsatFTREF))] <- sweep(tacsatFTREF[,kgeur(colnames(tacsatFTREF))],1,tacsatFTREF$pings,"/") + tacsatFTREF$ID <- af(ac(tacsatFTREF$ID.x)) + DT <- data.table(tacsatFTREF) + eq1 <- c.listquote(paste("sum(",colnames(tacsatFTREF[,kgeur(colnames(tacsatFTREF))]),",na.rm=TRUE)",sep="")) + tacsatFTREF <- DT[,eval(eq1),by=ID.x]; tacsatFTREF <- data.frame(tacsatFTREF); setnames(tacsatFTREF,colnames(tacsatFTREF),c("ID",colnames(eflaloTrip[,kgeur(colnames(eflaloTrip))]))) + } + + #------------------------------------------------------------------------------- + # 2a) Merge eflalo to tacsat with no matching FT_REF + #------------------------------------------------------------------------------- + + #- If you don't want to loose catch or value data, conserve the non-merged + # eflalo catches and distribute these over the tacsat records + if(conserve == TRUE){ + if(dim(tacsat)[1]>0 & dim(eflaloVessel)[1] > 0){ + + #------------------------------------------------------------------------------- + # 2a-1) Merge eflalo to tacsat with matching VE_REF + #------------------------------------------------------------------------------- + + if("day" %in% level){ + print("level: day & conserve = T, by vessel") + if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) + if(!"SI_DAY" %in% colnames(tacsat)) tacsat$SI_DAY <- an(format(tacsat$SI_DATIM,format="%j")) + if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) + + if(!"SI_YEAR" %in% colnames(eflaloVessel)) eflaloVessel$SI_YEAR <- an(format(eflaloVessel$LE_CDATIM,format="%Y")) + if(!"SI_DAY" %in% colnames(eflaloVessel)) eflaloVessel$SI_DAY <- an(format(eflaloVessel$LE_CDATIM,format="%j")) + + #- Count pings in tacsat set + nPings <- countPings(~year+VE_REF+icesrectangle+day,tacsat,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT","SI_DAY"), eflaloCol[c(kgs,eur)],NULL,by=by) + eflaloVessel <- res[["eflalo"]] + byDayTacsat <- res[["tacsat"]] + } + + if("ICESrectangle" %in% level){ + print("level: rectangle & conserve = T, by vessel") + if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) + if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) + + if(!"SI_YEAR" %in% colnames(eflaloVessel)) eflaloVessel$SI_YEAR <- an(format(eflaloVessel$LE_CDATIM,format="%Y")) + + #- Count pings in tacsat set + nPings <- countPings(~year+VE_REF+icesrectangle,tacsat,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF","LE_RECT"), eflaloCol[c(kgs,eur)],NULL,by=by) + eflaloVessel <- res[["eflalo"]] + byRectTacsat <- res[["tacsat"]] + } + if(TRUE){ #-For remainder of vessel merging not at ICESrectangle level + print("level: year & conserve = T, by vessel") + if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) + if(!"SI_YEAR" %in% colnames(eflaloVessel)) eflaloVessel$SI_YEAR <- an(format(eflaloVessel$LE_CDATIM,format="%Y")) + + #- Count pings in tacsat set + nPings <- countPings(~year+VE_REF,tacsat,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloVessel,tacsat,nPings,c("SI_YEAR","VE_REF" ), eflaloCol[c(kgs,eur)],NULL,by=by) + eflaloVessel <- res[["eflalo"]] + byVessTacsat <- res[["tacsat"]] + } + + #------------------------------------------------------------------------------- + # 2b-1) Bind all tacsat files with matching VE_REF + #------------------------------------------------------------------------------- + + if("day" %in% level){ tacsatVEREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat) + } else { + if("ICESrectangle" %in% level){ + tacsatVEREF <- rbind(byRectTacsat,byVessTacsat) + } else { tacsatVEREF <- byVessTacsat}} + tacsatVEREF[,kgeur(colnames(tacsatVEREF))] <- sweep(tacsatVEREF[,kgeur(colnames(tacsatVEREF))],1,tacsatVEREF$pings,"/") + tacsatVEREF$ID <- af(ac(tacsatVEREF$ID.x)) + DT <- data.table(tacsatVEREF) + eq1 <- c.listquote(paste("sum(",colnames(tacsatVEREF[,kgeur(colnames(tacsatVEREF))]),",na.rm=TRUE)",sep="")) + tacsatVEREF <- DT[,eval(eq1),by=ID.x]; tacsatVEREF <- data.frame(tacsatVEREF); setnames(tacsatVEREF,colnames(tacsatVEREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))]))) + } + + if(dim(tacsat)[1] > 0 & dim(eflaloNoVessel)[1] > 0){ + #------------------------------------------------------------------------------- + # 2a-2) Merge eflalo to tacsat with no matching FT_REF or VE_REF + #------------------------------------------------------------------------------- + if("day" %in% level){ + print("level: day & conserve = T, no vessel match") + if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) + if(!"SI_DAY" %in% colnames(tacsat)) tacsat$SI_DAY <- an(format(tacsat$SI_DATIM,format="%j")) + if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) + + if(!"SI_YEAR" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_YEAR <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y")) + if(!"SI_DAY" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_DAY <- an(format(eflaloNoVessel$LE_CDATIM,format="%j")) + + #- Count pings in tacsat set + nPings <- countPings(~year+icesrectangle+day,tacsat,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT","SI_DAY"), eflaloCol[c(kgs,eur)],NULL,by=by) + eflaloNoVessel <- res[["eflalo"]] + byDayTacsat <- res[["tacsat"]] + } + + if("ICESrectangle" %in% level){ + print("level: rectangle & conserve = T, no vessel match") + if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) + if(!"LE_RECT" %in% colnames(tacsat)) tacsat$LE_RECT <- ICESrectangle(tacsat) + + if(!"SI_YEAR" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_YEAR <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y")) + + #- Count pings in tacsat set + nPings <- countPings(~year+icesrectangle,tacsat,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR","LE_RECT"), eflaloCol[c(kgs,eur)],NULL,by=by) + eflaloNoVessel <- res[["eflalo"]] + byRectTacsat <- res[["tacsat"]] + } + if(TRUE){ #-For remainder of merging not at ICESrectangle level + print("level: year & conserve = T, no vessel match") + if(!"SI_YEAR" %in% colnames(tacsat)) tacsat$SI_YEAR <- an(format(tacsat$SI_DATIM,format="%Y")) + if(!"SI_YEAR" %in% colnames(eflaloNoVessel)) eflaloNoVessel$SI_YEAR <- an(format(eflaloNoVessel$LE_CDATIM,format="%Y")) + + #- Count pings in tacsat set + nPings <- countPings(~year,tacsat,by=by) + + #- Do the merging of eflalo to tacsat + res <- eflalo2Pings(eflaloNoVessel,tacsat,nPings,c("SI_YEAR"), eflaloCol[c(kgs,eur)],NULL,by=by) + eflaloNoVessel <- res[["eflalo"]] + byVessTacsat <- res[["tacsat"]] + } + #------------------------------------------------------------------------------- + # 2b-2) Bind all tacsat files with no matching FT_REF or VE_REF + #------------------------------------------------------------------------------- + + if("day" %in% level){ tacsatREF <- rbind(byDayTacsat,byRectTacsat,byVessTacsat) + } else { + if("ICESrectangle" %in% level){ tacsatREF <- rbind(byRectTacsat,byVessTacsat) + } else { tacsatREF <- byVessTacsat}} + tacsatREF[,kgeur(colnames(tacsatREF))] <- sweep(tacsatREF[,kgeur(colnames(tacsatREF))],1,tacsatREF$pings,"/") + tacsatREF$ID <- af(ac(tacsatREF$ID.x)) + DT <- data.table(tacsatREF) + eq1 <- c.listquote(paste("sum(",colnames(tacsatREF[,kgeur(colnames(tacsatREF))]),",na.rm=TRUE)",sep="")) + tacsatREF <- DT[,eval(eq1),by=ID.x]; tacsatREF <- data.frame(tacsatREF); setnames(tacsatREF,colnames(tacsatREF),c("ID",colnames(eflaloVessel[,kgeur(colnames(eflaloVessel))]))) + } + }#End conserve + + #------------------------------------------------------------------------------- + # 3) Merge all tacsat files together and return + #------------------------------------------------------------------------------- + + if(conserve==TRUE){ + if(exists("tacsatFTREF")){one <- tacsatFTREF} else{ one <- numeric()} + if(exists("tacsatVEREF")){two <- tacsatVEREF} else{ two <- numeric()} + if(exists("tacsatREF")) {three <- tacsatREF} else{ three <- numeric()} + tacsatTot <- rbind(one,two,three) + DT <- data.table(tacsatTot) + eq1 <- c.listquote(paste("sum(",colnames(tacsatTot[,kgeur(colnames(tacsatTot))]),",na.rm=TRUE)",sep="")) + tacsatTot <- DT[,eval(eq1),by=ID]; tacsatTot <- data.frame(tacsatTot); setnames(tacsatTot,colnames(tacsatTot),c("ID",colnames(eflalo[,kgeur(colnames(eflalo))]))) + tacsatReturn <- merge(tacsat,tacsatTot,by="ID",all.x=TRUE) + if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))] + if(variable == "kgs") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))] + if(variable == "all") tacsatReturn <- tacsatReturn + } else { + if(exists("tacsatFTREF")==FALSE){stop("You have selected not to conserve catches, but there is no trip identifier in the tacsat file")} + tacsatReturn <- merge(tacsat,tacsatFTREF,by="ID",all.x=TRUE) + if(variable == "value") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("EURO",colnames(tacsatReturn)))] + if(variable == "kgs") tacsatReturn <- tacsatReturn[,c(1:dim(tacsat)[2],grep("KG",colnames(tacsatReturn)))] + if(variable == "all") tacsatReturn <- tacsatReturn + } + if(returnAll & nrow(remtacsat)>0) + tacsatReturn <- orderBy(~ID,data=rbindTacsat(tacsatReturn,remtacsat)) + + return(orderBy(~ID,data=tacsatReturn)[,-match("ID",colnames(tacsatReturn))])} diff --git a/vmstools/R/summarizeTacsat.r b/vmstools/R/summarizeTacsat.r index 0e8d03c..d33c8b6 100644 --- a/vmstools/R/summarizeTacsat.r +++ b/vmstools/R/summarizeTacsat.r @@ -1,57 +1,91 @@ -summarizeTacsat <- function(tacsat){ - - nrVessels <- length(unique(tacsat$VE_REF)) - nrCountries <- length(unique(tacsat$VE_COU)) - - if (!"SI_DATIM" %in% colnames(tacsat)) - tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, - sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - - if("FT_REF" %in% colnames(tacsat)){ - totalEffort <- sum(intervalTacsat(tacsat,level="trip",fill.na=TRUE)$INTV,na.rm=TRUE)/60 - } else { - totalEffort <- sum(intervalTacsat(tacsat,level="vessel",fill.na=TRUE)$INTV,na.rm=TRUE)/60 - } - return(as.data.frame( - cbind(desc= c("nrCountries","nrVessels","minLon","maxLon","minLat","maxLat","minTime","maxTime","minHeading","maxHeading","minSpeed","maxSpeed","effort(hr)"), - value=c(nrCountries,nrVessels,round(range(tacsat$SI_LONG,na.rm=TRUE),3),round(range(tacsat$SI_LATI,na.rm=TRUE),3), - ac(range(tacsat$SI_DATIM,na.rm=TRUE)[1]),ac(range(tacsat$SI_DATIM,na.rm=TRUE)[2]), - range(tacsat$SI_HE,na.rm=TRUE),range(tacsat$SI_SP,na.rm=TRUE),round(totalEffort,1))),stringsAsFactors=FALSE))} - -summarizeEflalo <- function(eflalo){ - - nrVessels <- length(unique(eflalo$VE_REF)) - nrCountries <- length(unique(eflalo$VE_COU)) - - if (!"FT_DDATIM" %in% colnames(eflalo)) - eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT, eflalo$FT_DTIME, - sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - if (!"FT_LDATIM" %in% colnames(eflalo)) - eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT, eflalo$FT_LTIME, - sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") - - totalEffort <- sum(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="hours"),na.rm=TRUE) - totalKW <- sum(eflalo$VE_KW,na.rm=TRUE) - meanLength <- mean(eflalo$VE_LEN,na.rm=TRUE) - gears <- names(rev(table(eflalo$LE_GEAR)))[1:3] - catchvals <- colSums(eflalo[,kgeur(colnames(eflalo))],na.rm=TRUE) - catches <- catchvals[grep("LE_KG_",names(catchvals))] - values <- catchvals[grep("LE_EURO_",names(catchvals))] - - return(data.frame( - cbind(desc= c("nrCountries","nrVessels","minTime","maxTime","effort(hr)","totalKW","meanLength", - "gear1","gear2","gear3","catch1","catch2","catch3","value1","value2","value3"), - value=c(nrCountries,nrVessels,ac(range(eflalo$FT_DDATIM)[1]),ac(range(eflalo$FT_LDATIM)[2]), - round(totalEffort,1),round(totalKW,1),round(meanLength,1),gears, - paste(names(rev(sort(catches)))[1:3],round(rev(sort(catches))[1:3],0)), - paste(names(rev(sort(values )))[1:3],round(rev(sort(values ))[1:3],0)))), - stringsAsFactors=FALSE))} - - - - - - - - - +#' Summarize primary aspects of tacsat file +#' +#' Summary nr vessels, countries, spatial range, temporal range, effort from +#' tacsat +#' +#' +#' @param tacsat tacsat dataframe +#' @author Niels T. Hintzen +#' @seealso \code{\link{summarizeEflalo}},\code{\link{summary}} +#' @examples +#' +#' data(tacsat) +#' +#' summarizeTacsat(tacsat) +#' +#' @export summarizeTacsat +summarizeTacsat <- function(tacsat){ + + nrVessels <- length(unique(tacsat$VE_REF)) + nrCountries <- length(unique(tacsat$VE_COU)) + + if (!"SI_DATIM" %in% colnames(tacsat)) + tacsat$SI_DATIM <- as.POSIXct(paste(tacsat$SI_DATE, tacsat$SI_TIME, + sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + + if("FT_REF" %in% colnames(tacsat)){ + totalEffort <- sum(intervalTacsat(tacsat,level="trip",fill.na=TRUE)$INTV,na.rm=TRUE)/60 + } else { + totalEffort <- sum(intervalTacsat(tacsat,level="vessel",fill.na=TRUE)$INTV,na.rm=TRUE)/60 + } + return(as.data.frame( + cbind(desc= c("nrCountries","nrVessels","minLon","maxLon","minLat","maxLat","minTime","maxTime","minHeading","maxHeading","minSpeed","maxSpeed","effort(hr)"), + value=c(nrCountries,nrVessels,round(range(tacsat$SI_LONG,na.rm=TRUE),3),round(range(tacsat$SI_LATI,na.rm=TRUE),3), + ac(range(tacsat$SI_DATIM,na.rm=TRUE)[1]),ac(range(tacsat$SI_DATIM,na.rm=TRUE)[2]), + range(tacsat$SI_HE,na.rm=TRUE),range(tacsat$SI_SP,na.rm=TRUE),round(totalEffort,1))),stringsAsFactors=FALSE))} + + + +#' Summarize primary aspects of eflalo file +#' +#' Summary nr vessels, countries, temporal range, effort, top 3 catches and +#' values +#' +#' +#' @param eflalo eflalo dataframe +#' @author Niels T. Hintzen +#' @seealso \code{\link{summarizeTacsat}},\code{\link{summary}} +#' @examples +#' +#' data(eflalo) +#' +#' summarizeEflalo(eflalo) +#' +#' @export summarizeEflalo +summarizeEflalo <- function(eflalo){ + + nrVessels <- length(unique(eflalo$VE_REF)) + nrCountries <- length(unique(eflalo$VE_COU)) + + if (!"FT_DDATIM" %in% colnames(eflalo)) + eflalo$FT_DDATIM <- as.POSIXct(paste(eflalo$FT_DDAT, eflalo$FT_DTIME, + sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + if (!"FT_LDATIM" %in% colnames(eflalo)) + eflalo$FT_LDATIM <- as.POSIXct(paste(eflalo$FT_LDAT, eflalo$FT_LTIME, + sep = " "), tz = "GMT", format = "%d/%m/%Y %H:%M") + + totalEffort <- sum(difftime(eflalo$FT_LDATIM,eflalo$FT_DDATIM,units="hours"),na.rm=TRUE) + totalKW <- sum(eflalo$VE_KW,na.rm=TRUE) + meanLength <- mean(eflalo$VE_LEN,na.rm=TRUE) + gears <- names(rev(table(eflalo$LE_GEAR)))[1:3] + catchvals <- colSums(eflalo[,kgeur(colnames(eflalo))],na.rm=TRUE) + catches <- catchvals[grep("LE_KG_",names(catchvals))] + values <- catchvals[grep("LE_EURO_",names(catchvals))] + + return(data.frame( + cbind(desc= c("nrCountries","nrVessels","minTime","maxTime","effort(hr)","totalKW","meanLength", + "gear1","gear2","gear3","catch1","catch2","catch3","value1","value2","value3"), + value=c(nrCountries,nrVessels,ac(range(eflalo$FT_DDATIM)[1]),ac(range(eflalo$FT_LDATIM)[2]), + round(totalEffort,1),round(totalKW,1),round(meanLength,1),gears, + paste(names(rev(sort(catches)))[1:3],round(rev(sort(catches))[1:3],0)), + paste(names(rev(sort(values )))[1:3],round(rev(sort(values ))[1:3],0)))), + stringsAsFactors=FALSE))} + + + + + + + + + diff --git a/vmstools/R/surface.r b/vmstools/R/surface.r index 2674815..69751b2 100644 --- a/vmstools/R/surface.r +++ b/vmstools/R/surface.r @@ -1,100 +1,148 @@ -surface <- function(obj,method="Trapezoid",includeNA=TRUE,zone=NULL){ #Methods can be "Trapezoid" and "UTM" - require(sp) - require(PBSmapping) - if(!class(obj) %in% c('SpatialGridDataFrame','SpatialPolygons')) - stop("class of obj should be SpatialGridDataFrame or SpatialPolygons") - - if(class(obj) == 'SpatialPolygons') - { - allSourcePoly <- numeric() - counter <- 0 - for(iPol1 in 1:length(obj@polygons)){ - for(iPol2 in 1:length(obj@polygons[[iPol1]])){ - counter <- counter + 1 - sourcePoly <- data.frame(cbind(1,1:nrow(obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords), - obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2])) - rownames(sourcePoly)<-1:nrow(sourcePoly) - colnames(sourcePoly)<-c("PID","POS","X","Y") - sourcePoly$PID[] <-counter - - allSourcePoly <- rbind(allSourcePoly,sourcePoly) - } - } - areas <- calcArea(as.PolySet(allSourcePoly, projection="LL",zone=zone))$area - areas <- data.frame( - cbind(areas,do.call(rbind,lapply(obj@polygons,function(x){return(x@labpt)})), - do.call(rbind,lapply(obj@polygons,function(x){return(x@ID)}))),stringsAsFactors=FALSE) - colnames(areas) <- c("areas","labptx","labpty","ID") - - - obj <- SpatialPolygons(lapply(obj@polygons,function(x){ - res <- lapply( x@Polygons,function(y){ - subAreas <- subset(areas,ID == x@ID & labptx == ac(y@labpt[1]) & labpty == ac(y@labpt[2])) - y@area <- anf(subAreas$areas);return(y)}); - return(Polygons(res,ID=x@ID))})) - - } - if(!method %in% c("Trapezoid","UTM")) stop("method not available") - if (class(obj) %in% c('SpatialGridDataFrame')) # not empty... - { - if(method == "Trapezoid"){ - res <- ceiling(max(obj@grid@cellsize,na.rm=TRUE)/0.1 * 10) #automatic scaling - if(res < 3) res <- 3 - griddims <- summary(obj)$grid - bboxdims <- bbox(obj) - stlon <- bboxdims[1,1] - stlat <- bboxdims[2,1] - enlon <- bboxdims[1,2] - enlat <- bboxdims[2,2] - sizelon <- griddims[1,2] - sizelat <- griddims[2,2] - - lons <- seq(stlon,enlon,sizelon) - lats <- seq(stlat,enlat,sizelat) - - heights <- distance(lon=0,lat=stlat,lonRef=0,latRef=stlat+sizelat/(res-1)) - seqlats <- mapply(seq,lats[1:(length(lats)-1)],lats[2:length(lats)],length.out=res) - - base <- matrix(mapply(distance,lon=0,lat=c(seqlats),lonRef=sizelon,latRef=c(seqlats)),ncol=res,byrow=TRUE) - if(dim(base)[1] == 1){ - base1 <- base[1:(res-1)] - base2 <- base[2:res] - surface <- rep(sum(heights * (base1 + base2)/2),each=length(seq(stlon,enlon-sizelon,sizelon))) - } else { - base1 <- base[,1:(res-1)] - base2 <- base[,2:res] - surface <- rep(apply(heights * (base1 + base2) / 2,1,sum),each=length(seq(stlon,enlon-sizelon,sizelon))) - } - - obj@data$cellArea <- rev(surface) - } - if(method == "UTM"){ - require(PBSmapping) - griddims <- summary(obj)$grid - sizelon <- griddims[1,2] - sizelat <- griddims[2,2] - - ltCentreCell<-coordinates(obj) - - for (x in 1:(length(ltCentreCell)/2)){ - if (includeNA) { # speed up the calculation by dropping cells with fishing=NA /!| only work for DCF5 and DCF6! - minX<-ltCentreCell[x,1]-sizelon/2 - maxX<-ltCentreCell[x,1]+sizelon/2 - minY<-ltCentreCell[x,2]-sizelat/2 - maxY<-ltCentreCell[x,2]+sizelat/2 - ltX<-c(minX,minX,maxX,maxX) - ltY<-c(minY,maxY,maxY,minY) - sourcePoly<-cbind(rep(1,4),seq(1,4),ltX,ltY) - rownames(sourcePoly)<-seq(1,4) - colnames(sourcePoly)<-c("PID","POS","X","Y") - - polyArea<-calcArea(as.PolySet(sourcePoly, projection="LL",zone=zone)) - singleCellArea<-polyArea$area - } else {singleCellArea<-NA} - obj@data$cellArea[x]<-singleCellArea - } - } - } - return(obj)} - - \ No newline at end of file +#' Calculate surface from grid cells or polygons +#' +#' Calculate the surface in km2 of the grid / polygons that has been used +#' +#' Method UTM might take longer due to way of calculation, but is more precise +#' than the Trapezoid function, especially when larger gridcells are used. +#' +#' @param obj defined SpatialGridDataFrame or SpatialPolygons object (see 'sp' +#' package) +#' @param method Method to be used to calculate surface, either Trapezoid or +#' UTM +#' @param includeNA Whether to include cells which do not hold any data +#' @param zone Include UTM zone notation (or detected automatically when NULL +#' @return If obj is a SpatialGridDataFrame an additional column named +#' 'cellArea' is returned which holds the km2 area of the grid cell. If obj is +#' a SpatialPolygons, each polygon area slot is filled with the area in km2. +#' @author Niels T. Hintzen +#' @seealso \code{\link{createGrid}} +#' @examples +#' +#' data(tacsat) +#' +#' #Sort the Tacsat data +#' tacsat <- sortTacsat(tacsat) +#' tacsat <- tacsat[1:1000,] +#' +#' #Get the ranges of the tacsat data +#' xrange <- range(tacsat$SI_LONG,na.rm=TRUE); +#' xrange <- c(min(xrange) - min(xrange)*0.05, +#' max(xrange) + max(xrange)*0.05) +#' yrange <- range(tacsat$SI_LATI,na.rm=TRUE); +#' yrange <- c(min(yrange) - min(yrange)*0.05, +#' max(yrange) + max(yrange)*0.05) +#' #Setup a grid +#' sPDF <- createGrid(xrange,yrange,resx=0.1,resy=0.05,type="SpatialGridDataFrame") +#' +#' #Setup a polygon +#' sP <- lonLat2SpatialPolygons(lst=list(data.frame( +#' SI_LONG=c(4,4.5,4.7,4), +#' SI_LATI=c(54,54,55.5,55.7)))) +#' +#' #Calculate the cell surface +#' result <- surface(sPDF,method="Trapezoid",includeNA=TRUE) +#' print(head(result@data)) +#' result <- surface(sP,zone=31) +#' print(result@polygons[[1]]@Polygons[[1]]@area) +#' +#' @export surface +surface <- function(obj,method="Trapezoid",includeNA=TRUE,zone=NULL){ #Methods can be "Trapezoid" and "UTM" + require(sp) + require(PBSmapping) + if(!class(obj) %in% c('SpatialGridDataFrame','SpatialPolygons')) + stop("class of obj should be SpatialGridDataFrame or SpatialPolygons") + + if(class(obj) == 'SpatialPolygons') + { + allSourcePoly <- numeric() + counter <- 0 + for(iPol1 in 1:length(obj@polygons)){ + for(iPol2 in 1:length(obj@polygons[[iPol1]])){ + counter <- counter + 1 + sourcePoly <- data.frame(cbind(1,1:nrow(obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords), + obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,1],obj@polygons[[iPol1]]@Polygons[[iPol2]]@coords[,2])) + rownames(sourcePoly)<-1:nrow(sourcePoly) + colnames(sourcePoly)<-c("PID","POS","X","Y") + sourcePoly$PID[] <-counter + + allSourcePoly <- rbind(allSourcePoly,sourcePoly) + } + } + areas <- calcArea(as.PolySet(allSourcePoly, projection="LL",zone=zone))$area + areas <- data.frame( + cbind(areas,do.call(rbind,lapply(obj@polygons,function(x){return(x@labpt)})), + do.call(rbind,lapply(obj@polygons,function(x){return(x@ID)}))),stringsAsFactors=FALSE) + colnames(areas) <- c("areas","labptx","labpty","ID") + + + obj <- SpatialPolygons(lapply(obj@polygons,function(x){ + res <- lapply( x@Polygons,function(y){ + subAreas <- subset(areas,ID == x@ID & labptx == ac(y@labpt[1]) & labpty == ac(y@labpt[2])) + y@area <- anf(subAreas$areas);return(y)}); + return(Polygons(res,ID=x@ID))})) + + } + if(!method %in% c("Trapezoid","UTM")) stop("method not available") + if (class(obj) %in% c('SpatialGridDataFrame')) # not empty... + { + if(method == "Trapezoid"){ + res <- ceiling(max(obj@grid@cellsize,na.rm=TRUE)/0.1 * 10) #automatic scaling + if(res < 3) res <- 3 + griddims <- summary(obj)$grid + bboxdims <- bbox(obj) + stlon <- bboxdims[1,1] + stlat <- bboxdims[2,1] + enlon <- bboxdims[1,2] + enlat <- bboxdims[2,2] + sizelon <- griddims[1,2] + sizelat <- griddims[2,2] + + lons <- seq(stlon,enlon,sizelon) + lats <- seq(stlat,enlat,sizelat) + + heights <- distance(lon=0,lat=stlat,lonRef=0,latRef=stlat+sizelat/(res-1)) + seqlats <- mapply(seq,lats[1:(length(lats)-1)],lats[2:length(lats)],length.out=res) + + base <- matrix(mapply(distance,lon=0,lat=c(seqlats),lonRef=sizelon,latRef=c(seqlats)),ncol=res,byrow=TRUE) + if(dim(base)[1] == 1){ + base1 <- base[1:(res-1)] + base2 <- base[2:res] + surface <- rep(sum(heights * (base1 + base2)/2),each=length(seq(stlon,enlon-sizelon,sizelon))) + } else { + base1 <- base[,1:(res-1)] + base2 <- base[,2:res] + surface <- rep(apply(heights * (base1 + base2) / 2,1,sum),each=length(seq(stlon,enlon-sizelon,sizelon))) + } + + obj@data$cellArea <- rev(surface) + } + if(method == "UTM"){ + require(PBSmapping) + griddims <- summary(obj)$grid + sizelon <- griddims[1,2] + sizelat <- griddims[2,2] + + ltCentreCell<-coordinates(obj) + + for (x in 1:(length(ltCentreCell)/2)){ + if (includeNA) { # speed up the calculation by dropping cells with fishing=NA /!| only work for DCF5 and DCF6! + minX<-ltCentreCell[x,1]-sizelon/2 + maxX<-ltCentreCell[x,1]+sizelon/2 + minY<-ltCentreCell[x,2]-sizelat/2 + maxY<-ltCentreCell[x,2]+sizelat/2 + ltX<-c(minX,minX,maxX,maxX) + ltY<-c(minY,maxY,maxY,minY) + sourcePoly<-cbind(rep(1,4),seq(1,4),ltX,ltY) + rownames(sourcePoly)<-seq(1,4) + colnames(sourcePoly)<-c("PID","POS","X","Y") + + polyArea<-calcArea(as.PolySet(sourcePoly, projection="LL",zone=zone)) + singleCellArea<-polyArea$area + } else {singleCellArea<-NA} + obj@data$cellArea[x]<-singleCellArea + } + } + } + return(obj)} + + diff --git a/vmstools/R/table_variables.r b/vmstools/R/table_variables.r index f6f6026..0fe45fd 100644 --- a/vmstools/R/table_variables.r +++ b/vmstools/R/table_variables.r @@ -1,13 +1,41 @@ -############################################################# -# Transpose the dataset (change variables into individuals) # -############################################################# - -table_variables=function(data){ - n=nrow(data) - res1=t(as.matrix(data[1:round(n/2),])) - res2=t(as.matrix(data[(round(n/2)+1):n,])) - res=cbind(res1,res2) - row.names(res)=colnames(data) - colnames(res)=row.names(data) - return(res) -} +############################################################# +# Transpose the dataset (change variables into individuals) # +############################################################# + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export table_variables +table_variables=function(data){ + n=nrow(data) + res1=t(as.matrix(data[1:round(n/2),])) + res2=t(as.matrix(data[(round(n/2)+1):n,])) + res=cbind(res1,res2) + row.names(res)=colnames(data) + colnames(res)=row.names(data) + return(res) +} diff --git a/vmstools/R/tacsatMCP.r b/vmstools/R/tacsatMCP.r index 55f06c1..6ee19e7 100644 --- a/vmstools/R/tacsatMCP.r +++ b/vmstools/R/tacsatMCP.r @@ -1,19 +1,53 @@ -## tacsatMinimumConvexPolygon.r -## by Fabrizio Manco, 22/09/2010 -## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6) - -tacsatMCP <- function (tacsat, pctThreshold=90) -{ - require(aspace) - - vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI) - - vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold) - - MCPolygon<-as.data.frame(vmsMCP$MCP.coords) - pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3]) - - tacsat$INMCP<-pointInOut - - return(tacsat) -} \ No newline at end of file +## tacsatMinimumConvexPolygon.r +## by Fabrizio Manco, 22/09/2010 +## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6) + + + +#' Flag the tacsat ping inside a Minimum Convex Polygon +#' +#' This function will flag the tacsat pings that are located inside a minimum +#' convex polygon which contains a percentage of the whole number of pings +#' +#' This function is based on the calc_mcp function of the aspace package. The +#' plot_mcp function allows to plot the polygon. It is used in the DCF +#' Indicator 6 as a filtering of aggregated fisheries. +#' +#' @param tacsat tacsat data frame +#' @param pctThreshold Percentage of pings to be included into the MCP +#' @return The tacsat is returned with attached a column called INMCP which +#' flag the pings inside the MCP. The nodes of the MCP are also returned. +#' @author Fabrizio Manco +#' @seealso \code{\link{indicators}} +#' @references EU lot 2 project +#' @examples +#' +#' require(PBSmapping) +#' data(tacsat) +#' +#' # Flag the pings inside a polygon gathering 90% of the pings +#' tacsat<-tacsatMCP(tacsat, pctThreshold=90) +#' # Filter the tacsat data to remove the points outside the MCP +#' tacsat<-subset(tacsat, tacsat$INMCP!=0) +#' # Grid the filtered vms points +#' vmsgrid<-vmsGridCreate(tacsat, nameLon = "SI_LONG", nameLat = "SI_LATI", +#' cellsizeX=0.05, cellsizeY=0.05, plotMap=TRUE) +#' # Add the MCP to the map +#' plot_mcp(plotnew=FALSE, plotpoints=FALSE, titletxt="") +#' +#' @export tacsatMCP +tacsatMCP <- function (tacsat, pctThreshold=90) +{ + require(aspace) + + vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI) + + vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold) + + MCPolygon<-as.data.frame(vmsMCP$MCP.coords) + pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3]) + + tacsat$INMCP<-pointInOut + + return(tacsat) +} diff --git a/vmstools/R/tacsatMinimumConvexPolygon.r b/vmstools/R/tacsatMinimumConvexPolygon.r index 42f8d67..cd2a70c 100644 --- a/vmstools/R/tacsatMinimumConvexPolygon.r +++ b/vmstools/R/tacsatMinimumConvexPolygon.r @@ -1,21 +1,43 @@ -## tacsatMinimumConvexPolygon.r -## by Fabrizio Manco, 22/09/2010 -## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6) - -tacsatMinimumConvexPolygon <- function (tacsat, - pctThreshold=90) -{ - require(aspace) - require(ade4) - - vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI) - - vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold) - - MCPolygon<-as.data.frame(vmsMCP$MCP.coords) - pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3]) - - tacsat$INMCP<-pointInOut - - return(tacsat) -} \ No newline at end of file +## tacsatMinimumConvexPolygon.r +## by Fabrizio Manco, 22/09/2010 +## Flag vms pings inside a convex polygon regrouping a threshold percentage of total points (default 90%, for indicator DCF6) + + + +#' Flag tacsat records that are within the convex polygon +#' +#' Flag tacsat records that are within the convex polygon with a predefined +#' threshold +#' +#' See point.in.polygon function for returned value details +#' +#' @param tacsat Tacsat dataframe +#' @param pctThreshold Threshold of points to consider. Between 0 and 100. +#' @author Fabrizio Manco +#' @references EU Lot 2 project +#' @examples +#' +#' require(adehabitat) +#' require(ade4) +#' data(tacsat) +#' tacsat <- tacsat[1:100,] +#' tacsatMinimumConvexPolygon(tacsat,95) +#' +#' @export tacsatMinimumConvexPolygon +tacsatMinimumConvexPolygon <- function (tacsat, + pctThreshold=90) +{ + require(aspace) + require(ade4) + + vmsPoints<-cbind(tacsat$SI_LONG, tacsat$SI_LATI) + + vmsMCP<-calc_mcp(id=1, points = vmsPoints, filename="", verbose = FALSE, pct = pctThreshold) + + MCPolygon<-as.data.frame(vmsMCP$MCP.coords) + pointInOut<-point.in.polygon(vmsPoints[,1], vmsPoints[,2], MCPolygon[,2], MCPolygon[,3]) + + tacsat$INMCP<-pointInOut + + return(tacsat) +} diff --git a/vmstools/R/targetspecies.r b/vmstools/R/targetspecies.r index 3f97858..c9c50ae 100644 --- a/vmstools/R/targetspecies.r +++ b/vmstools/R/targetspecies.r @@ -1,34 +1,62 @@ -############################################################# -# Determine the species with a test-value > 1.96 by cluster # -############################################################# - -targetspecies=function(resval){ - p=nrow(resval) - nbgp=ncol(resval) - - tabnumespcib=data.frame() - tabnomespcib=data.frame() - - for(i in 1:nbgp){ - # qnorm(0.975,mean=0,sd=1)=1.96 (P(resval>1.96)=0.025) - numespcib=which(resval[,i]>1.96) - numespcibdec=numespcib[order(resval[numespcib,i],decreasing=TRUE)] - nomespcib=names(numespcibdec) - - nbespgpcib=length(numespcib) - - if(nbespgpcib>0){ - for (j in 1:nbespgpcib){ - tabnumespcib[i,j]=numespcibdec[j] - tabnomespcib[i,j]=nomespcib[j] - } - }else{ - tabnumespcib[i,]=NA - tabnomespcib[i,]=NA - } - } - tabnumespcib=as.matrix(tabnumespcib) - tabnomespcib=as.matrix(tabnomespcib) - return(list(tabnumespcib=tabnumespcib,tabnomespcib=tabnomespcib)) -} - +############################################################# +# Determine the species with a test-value > 1.96 by cluster # +############################################################# + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export targetspecies +targetspecies=function(resval){ + p=nrow(resval) + nbgp=ncol(resval) + + tabnumespcib=data.frame() + tabnomespcib=data.frame() + + for(i in 1:nbgp){ + # qnorm(0.975,mean=0,sd=1)=1.96 (P(resval>1.96)=0.025) + numespcib=which(resval[,i]>1.96) + numespcibdec=numespcib[order(resval[numespcib,i],decreasing=TRUE)] + nomespcib=names(numespcibdec) + + nbespgpcib=length(numespcib) + + if(nbespgpcib>0){ + for (j in 1:nbespgpcib){ + tabnumespcib[i,j]=numespcibdec[j] + tabnomespcib[i,j]=nomespcib[j] + } + }else{ + tabnumespcib[i,]=NA + tabnomespcib[i,]=NA + } + } + tabnumespcib=as.matrix(tabnumespcib) + tabnomespcib=as.matrix(tabnomespcib) + return(list(tabnumespcib=tabnumespcib,tabnomespcib=tabnomespcib)) +} + diff --git a/vmstools/R/tclWindows.r b/vmstools/R/tclWindows.r index 6a31695..c6b00bd 100644 --- a/vmstools/R/tclWindows.r +++ b/vmstools/R/tclWindows.r @@ -1,65 +1,75 @@ -callNumberPeak <- function(){ -tt <- tktoplevel() -peaks <- tclVar(5) - -f1 <- tkframe(tt) -tkpack(f1, side='top') -tkpack(tklabel(f1, text='peaks: '), side='left') -tkpack(tkentry(f1, textvariable=peaks), side='left') - -tkpack(tkbutton(tt, text='Next', command=function() tkdestroy(tt)), - side='right', anchor='s') - -tkwait.window(tt) -return(as.numeric(tclvalue(peaks)))} - - - - -callPeakValue <- function(pks){ - -#-Put default values for peaks -for(iPeaks in 1:pks){ - if(iPeaks == 1) entry1 <- tclVar("-10") - if(iPeaks == 2) entry2 <- tclVar("-5") - if(iPeaks == 3) entry3 <- tclVar("0") - if(iPeaks == 4) entry4 <- tclVar("5") - if(iPeaks == 5) entry5 <- tclVar("10") -} - -#-Create input window -tt <- tktoplevel() -tkwm.title(tt,"Value of peaks") -for(iPeaks in 1:pks){ - if(iPeaks==1) box1 <- tkentry(tt, textvariable=entry1) - if(iPeaks==2) box2 <- tkentry(tt, textvariable=entry2) - if(iPeaks==3) box3 <- tkentry(tt, textvariable=entry3) - if(iPeaks==4) box4 <- tkentry(tt, textvariable=entry4) - if(iPeaks==5) box5 <- tkentry(tt, textvariable=entry5) -} -#-Create input rows -tkgrid(tklabel(tt,text="value of peaks"),columnspan=pks) -for(iPeaks in 1:pks){ - if(iPeaks==1) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box1) - if(iPeaks==2) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box2) - if(iPeaks==3) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box3) - if(iPeaks==4) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box4) - if(iPeaks==5) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box5) -} - -done <- tclVar(0) -eqvar <- tclVar(0) - -#-Create submit button -submit.but <- tkbutton(tt, text="submit",command=function()tclvalue(done)<-1) - -tkgrid(submit.but) -tkbind(tt, "", function()tclvalue(done)<-2) -tkwait.variable(done) - -if(tclvalue(done)=="2") stop("aborted") -tkdestroy(tt) -valPeaks <- numeric() -for(iPks in 1:pks) valPeaks <- paste(valPeaks,tclvalue(get(paste("entry",iPks,sep="")))) -return(valPeaks)} - +callNumberPeak <- function(){ +tt <- tktoplevel() +peaks <- tclVar(5) + +f1 <- tkframe(tt) +tkpack(f1, side='top') +tkpack(tklabel(f1, text='peaks: '), side='left') +tkpack(tkentry(f1, textvariable=peaks), side='left') + +tkpack(tkbutton(tt, text='Next', command=function() tkdestroy(tt)), + side='right', anchor='s') + +tkwait.window(tt) +return(as.numeric(tclvalue(peaks)))} + + + + + + +#' Internal function to retreive information on number of peaks +#' +#' Internal function used together with analyseTacsatActivity. +#' +#' +#' @param pks Number of peaks spotted +#' @author Niels T. Hintzen +#' @export callPeakValue +callPeakValue <- function(pks){ + +#-Put default values for peaks +for(iPeaks in 1:pks){ + if(iPeaks == 1) entry1 <- tclVar("-10") + if(iPeaks == 2) entry2 <- tclVar("-5") + if(iPeaks == 3) entry3 <- tclVar("0") + if(iPeaks == 4) entry4 <- tclVar("5") + if(iPeaks == 5) entry5 <- tclVar("10") +} + +#-Create input window +tt <- tktoplevel() +tkwm.title(tt,"Value of peaks") +for(iPeaks in 1:pks){ + if(iPeaks==1) box1 <- tkentry(tt, textvariable=entry1) + if(iPeaks==2) box2 <- tkentry(tt, textvariable=entry2) + if(iPeaks==3) box3 <- tkentry(tt, textvariable=entry3) + if(iPeaks==4) box4 <- tkentry(tt, textvariable=entry4) + if(iPeaks==5) box5 <- tkentry(tt, textvariable=entry5) +} +#-Create input rows +tkgrid(tklabel(tt,text="value of peaks"),columnspan=pks) +for(iPeaks in 1:pks){ + if(iPeaks==1) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box1) + if(iPeaks==2) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box2) + if(iPeaks==3) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box3) + if(iPeaks==4) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box4) + if(iPeaks==5) tkgrid(tklabel(tt,text=paste("peak",iPeaks)), box5) +} + +done <- tclVar(0) +eqvar <- tclVar(0) + +#-Create submit button +submit.but <- tkbutton(tt, text="submit",command=function()tclvalue(done)<-1) + +tkgrid(submit.but) +tkbind(tt, "", function()tclvalue(done)<-2) +tkwait.variable(done) + +if(tclvalue(done)=="2") stop("aborted") +tkdestroy(tt) +valPeaks <- numeric() +for(iPks in 1:pks) valPeaks <- paste(valPeaks,tclvalue(get(paste("entry",iPks,sep="")))) +return(valPeaks)} + diff --git a/vmstools/R/test.values.r b/vmstools/R/test.values.r index 5b2aed3..1a3f073 100644 --- a/vmstools/R/test.values.r +++ b/vmstools/R/test.values.r @@ -1,48 +1,76 @@ -###################################################### -# Compute the test-value for each species by cluster # -###################################################### - -test.values=function(groupes,data){ - - n=nrow(data) - p=ncol(data) - noms_var=colnames(data) - nb_groupes=length(levels(as.factor(groupes))) - noms_groupes=character(nb_groupes) - - stats_globales=matrix(0,nrow=p,ncol=2) - row.names(stats_globales)=noms_var - colnames(stats_globales)=c("mean","variance") - for (i in 1:p){ - stats_globales[i,1]=mean(data[,noms_var[i]]) - stats_globales[i,2]=var(data[,noms_var[i]]) - } - - res=matrix(0,nrow=p,ncol=nb_groupes) - row.names(res)=noms_var - - for (j in 1:nb_groupes){ - groupe=which(groupes==j) - n_k=length(groupe) - - for (i in 1:p){ - mu_k=mean(data[groupe,noms_var[i]]) - mu=stats_globales[noms_var[i],"mean"] - V=stats_globales[noms_var[i],"variance"] - V_mu_k=(n-n_k)*V/(n_k*(n-1)) - - if(V_mu_k==0){ - Valeur_test=0 - }else{ - Valeur_test=(mu_k-mu)/sqrt(V_mu_k) - } - - res[i,j]=Valeur_test - rm(Valeur_test) - } - rm(groupe) - noms_groupes[j]=paste("Cluster",j,sep=" ") - } - colnames(res)=noms_groupes - return(res) -} +###################################################### +# Compute the test-value for each species by cluster # +###################################################### + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export test.values +test.values=function(groupes,data){ + + n=nrow(data) + p=ncol(data) + noms_var=colnames(data) + nb_groupes=length(levels(as.factor(groupes))) + noms_groupes=character(nb_groupes) + + stats_globales=matrix(0,nrow=p,ncol=2) + row.names(stats_globales)=noms_var + colnames(stats_globales)=c("mean","variance") + for (i in 1:p){ + stats_globales[i,1]=mean(data[,noms_var[i]]) + stats_globales[i,2]=var(data[,noms_var[i]]) + } + + res=matrix(0,nrow=p,ncol=nb_groupes) + row.names(res)=noms_var + + for (j in 1:nb_groupes){ + groupe=which(groupes==j) + n_k=length(groupe) + + for (i in 1:p){ + mu_k=mean(data[groupe,noms_var[i]]) + mu=stats_globales[noms_var[i],"mean"] + V=stats_globales[noms_var[i],"variance"] + V_mu_k=(n-n_k)*V/(n_k*(n-1)) + + if(V_mu_k==0){ + Valeur_test=0 + }else{ + Valeur_test=(mu_k-mu)/sqrt(V_mu_k) + } + + res[i,j]=Valeur_test + rm(Valeur_test) + } + rm(groupe) + noms_groupes[j]=paste("Cluster",j,sep=" ") + } + colnames(res)=noms_groupes + return(res) +} diff --git a/vmstools/R/transformation_proportion.r b/vmstools/R/transformation_proportion.r index bdf366f..6502af1 100644 --- a/vmstools/R/transformation_proportion.r +++ b/vmstools/R/transformation_proportion.r @@ -1,19 +1,47 @@ -################################################################# -# Transform quantities to percentage values (between 0 and 100) # -# of each species in the logevent total catch # -################################################################# - -transformation_proportion=function(tab){ - res=as.matrix(tab) - n=nrow(tab) - p=ncol(tab) - for (i in 1:n){ - sommeligne=sum(res[i,], na.rm=TRUE) - if(sommeligne==0){ - res[i,]=rep(0,p) - }else{ - res[i,]=res[i,]*(100/sommeligne) - } - } - return(res) -} \ No newline at end of file +################################################################# +# Transform quantities to percentage values (between 0 and 100) # +# of each species in the logevent total catch # +################################################################# + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export transformation_proportion +transformation_proportion=function(tab){ + res=as.matrix(tab) + n=nrow(tab) + p=ncol(tab) + for (i in 1:n){ + sommeligne=sum(res[i,], na.rm=TRUE) + if(sommeligne==0){ + res[i,]=rep(0,p) + }else{ + res[i,]=res[i,]*(100/sommeligne) + } + } + return(res) +} diff --git a/vmstools/R/vmsGridCreate.r b/vmstools/R/vmsGridCreate.r index 8f5c5ad..162911e 100644 --- a/vmstools/R/vmsGridCreate.r +++ b/vmstools/R/vmsGridCreate.r @@ -1,141 +1,214 @@ -#vmsGridCreate.r -#andy south 10/2/09 - -#flexible function to create fishing activity grids from VMS -#change to check upload Paris june 10 - -vmsGridCreate <- function( dF - , nameLon = "Longitude" - , nameLat = "Latitude" - , nameVarToSum = "" - , cellsizeX = 0.5 - , cellsizeY = 0.5 - , we="" - , ea="" - , so="" - , no="" - , gridValName="fishing" - , plotMap = TRUE - , plotTitle = "" - , numCats = 5 - , paletteCats = "heat.colors" - , addLegend = TRUE - , legendx='bottomleft' - , legendncol = 1 - , legendtitle = "fishing activity" - , plotPoints = TRUE - , legPoints = FALSE - , colPoints = 1 - , colLand = 'sienna' - , addICESgrid = FALSE - , addScale = TRUE - , outGridFile = "" #name for output gridAscii - , outPlot = "" #name for output png - , ... ) -{ - -require(sp) -require(maptools) - -lstargs <- list(...) - -#only create grids when num records >0 (otherwise generates error) -if ( nrow(dF) > 0 ) - { - - #if bounds are not specified then set them from the data - #rounds bounds to nearest whole cell unit - if ( we == "" ) {we = min( dF[[nameLon]], na.rm=TRUE ); we = we - we%%cellsizeX} - if ( ea == "" ) {ea = max( dF[[nameLon]], na.rm=TRUE ); ea = ea - ea%%cellsizeX + cellsizeX} - if ( so == "" ) {so = min( dF[[nameLat]], na.rm=TRUE ); so = so - so%%cellsizeY} - if ( no == "" ) {no = max( dF[[nameLat]], na.rm=TRUE ); no = no - no%%cellsizeY + cellsizeY} - - #if ( ea == "" ) ea = max( dF[[nameLon]], na.rm=T ) - #if ( so == "" ) so = min( dF[[nameLat]], na.rm=T ) - #if ( no == "" ) no = max( dF[[nameLat]], na.rm=T ) - - #this copes with negative we or so values - numXcells <- ceiling((ea-we)/cellsizeX) - numYcells <- ceiling((no-so)/cellsizeY) - #this copes with negative ea or no values - numXcells <- abs(numXcells) - numYcells <- abs(numYcells) - - #setting grid topology using package 'sp' - #gridTopology <- GridTopology(c(we,so), c(cellsize,cellsize), c(numXcells,numYcells)) - #this sets grid at lower left corner rather than centre - gridTopology <- GridTopology(c(we+(cellsizeX/2),so+(cellsizeY/2)), c(cellsizeX,cellsizeY), c(numXcells,numYcells)) - spatialGrid <- SpatialGrid(grid=gridTopology) - gridded(spatialGrid) = TRUE - - #put points into a 'SpatialPointsDataFrame' 'sp' object - coords <- cbind(x=dF[[nameLon]],y=dF[[nameLat]]) - sPDF <- SpatialPointsDataFrame(coords,data=dF) - - #overlay to find which grid cell that each VMS point is in - #can take a long time, but does work eventually - gridCellIndexPerVMSpoint <- over( as(sPDF,"SpatialPoints"),spatialGrid ) - sPDF$gridCellIndex <- gridCellIndexPerVMSpoint - - #if there's a column of time intervals then sum them - if (nameVarToSum != "") - { - #sum timeInterval for each gridCellIndex - perCell <- tapply(sPDF[[nameVarToSum]], gridCellIndexPerVMSpoint, sum) - #print('1') - } - else - { - #if no time interval, just count pings - #probably a better way than doing this! - sPDF$ones <- 1 #this just creates a vector of all 1s - perCell <- tapply(sPDF$ones, gridCellIndexPerVMSpoint, sum) - #print('2') - } - - #then need to get those aggregated data values back onto the original grid - #get it to a spatialGridDataFrame to be able to do that - - #create blank dataframe for data based on grid dimensions I know : numXcells * numYcells - dFdataEmpty <- data.frame( seqID=(1:(numXcells*numYcells))) - spatialGridDataFrame <- SpatialGridDataFrame( grid=gridTopology, data=dFdataEmpty ) - - spatialGridDataFrame[[gridValName]] <- NA #setting up blank column - #assigns summed values per cell back to the grid - #!bit tricky - spatialGridDataFrame[[gridValName]][as.integer(names(perCell))] <- c(perCell) - - if (plotMap) - { - - mapGrid(spatialGridDataFrame, sPDF - ,we=we, ea=ea, so=so, no=no - ,gridValName=gridValName, plotTitle = plotTitle - ,numCats = numCats, paletteCats =paletteCats, addLegend = addLegend - ,legendx=legendx, legendncol = legendncol - ,legendtitle = legendtitle, plotPoints = plotPoints, legPoints = legPoints, colPoints=colPoints - , colLand=colLand, addICESgrid = addICESgrid, addScale = addScale - ,outGridFile = outGridFile, outPlot = outPlot, breaks0=lstargs$breaks0 ) - } #end of plotMap - - #to output spatialGridDataFrame to a gridascii file - if (outGridFile != "") - { - writeAsciiGrid(spatialGridDataFrame, outGridFile, na.value = -99.99, attr=gridValName, dec='.') - } - - #option to save the plot - if (outPlot != "") - { - savePlot(outPlot,type='png') - } - - #returning invisibly - invisible(spatialGridDataFrame) - - } #end of if (nrow(dF)>0) - -} #end of vmsGridCreate - - - +#vmsGridCreate.r +#andy south 10/2/09 + +#flexible function to create fishing activity grids from VMS +#change to check upload Paris june 10 + + + +#' function to create grids from point data by counting points in cells or +#' summing an attribute +#' +#' Accepts an input of points data in a data frame with named columns for X and +#' Y. Creates a grid of defined cell size in X and Y directions (cells can be +#' unequal). Either counts points in cells or summs a attribute variable if one +#' is supplied. Optionally plots a map of the grid and outputs to a gridAscii +#' file and/or an image. +#' +#' +#' @param dF a dataFrame containing point data +#' @param nameLon name of the column in the dataFrame containing Longitude or x +#' values +#' @param nameLat name of the column in the dataFrame containing Latitude or y +#' values +#' @param nameVarToSum optional name of the column in the dataFrame containing +#' the attribute values to sum in the grid. If set to "" points are counted +#' @param cellsizeX length X (horizontal) of desired grid cells, in same units +#' as the coordinates +#' @param cellsizeY length Y (vertical) of desired grid cells, in same units as +#' the coordinates +#' @param we western bounds of the desired grid +#' @param ea eastern bounds of the desired grid +#' @param so southern bounds of the desired grid +#' @param no northern bounds of the desired grid +#' @param gridValName the name to give to the attribute column of the returned +#' \code{SpatialGridDataFrame}, set to 'fishing' by default +#' @param plotMap whether to plot a map of the resulting grid +#' @param plotTitle optional title to add to the plot +#' @param numCats how many categories to classify grid values into for map plot +#' (uses\code{pretty()}) classification) +#' @param paletteCats color pallete to use +#' @param addLegend whether to add a legend to the plot +#' @param legendx position of legend should be one of 'bottomright', 'bottom', +#' 'bottomleft', 'left', 'topleft', 'top', 'topright', 'right', 'center' +#' @param legendncol number of columns in the legend +#' @param legendtitle legend title +#' @param plotPoints whether to add the original points to the plot +#' @param legPoints Logical. Points in legend +#' @param colPoints color of points to plot +#' @param colland color of land +#' @param addICESgrid Logical. Adding ICES grid on top +#' @param addScale Logical. Adding axes +#' @param outGridFile optional name for a gridAscii file to be created from the +#' grid +#' @param outPlot optional name for a png file to be created from the plot +#' @param \dots NOT used yet +#' @return a \code{SpatialGridDataFrame} object of the grid defined in package +#' \code{sp} +#' @author Andy South +#' @seealso \code{\link{mapGrid}} +#' @references EU VMS tools project +#' @examples +#' +#' #vmsGridCreate(dF, nameLon = "POS_LONGITUDE", nameLat = "POS_LATITUDE", +#' # cellsizeX = 0.5, cellsizeY = 0.5,legendx='bottomright',plotPoints=TRUE ) +#' #get the example data +#' data(tacsat) +#' +#' #subset the first 2000 points to avoid problems with NAs +#' dFVMS <- tacsat[1:2000,] +#' +#' #create vms grid minimum call with defaults +#' vmsGridCreate(dFVMS,nameLat='SI_LATI',nameLon='SI_LONG') +#' +#' #making the grid finer +#' vmsGridCreate(dFVMS,nameLat='SI_LATI',nameLon='SI_LONG', +#' cellsizeX=0.05,cellsizeY=0.05) +#' +#' +#' @export vmsGridCreate +vmsGridCreate <- function( dF + , nameLon = "Longitude" + , nameLat = "Latitude" + , nameVarToSum = "" + , cellsizeX = 0.5 + , cellsizeY = 0.5 + , we="" + , ea="" + , so="" + , no="" + , gridValName="fishing" + , plotMap = TRUE + , plotTitle = "" + , numCats = 5 + , paletteCats = "heat.colors" + , addLegend = TRUE + , legendx='bottomleft' + , legendncol = 1 + , legendtitle = "fishing activity" + , plotPoints = TRUE + , legPoints = FALSE + , colPoints = 1 + , colLand = 'sienna' + , addICESgrid = FALSE + , addScale = TRUE + , outGridFile = "" #name for output gridAscii + , outPlot = "" #name for output png + , ... ) +{ + +require(sp) +require(maptools) + +lstargs <- list(...) + +#only create grids when num records >0 (otherwise generates error) +if ( nrow(dF) > 0 ) + { + + #if bounds are not specified then set them from the data + #rounds bounds to nearest whole cell unit + if ( we == "" ) {we = min( dF[[nameLon]], na.rm=TRUE ); we = we - we%%cellsizeX} + if ( ea == "" ) {ea = max( dF[[nameLon]], na.rm=TRUE ); ea = ea - ea%%cellsizeX + cellsizeX} + if ( so == "" ) {so = min( dF[[nameLat]], na.rm=TRUE ); so = so - so%%cellsizeY} + if ( no == "" ) {no = max( dF[[nameLat]], na.rm=TRUE ); no = no - no%%cellsizeY + cellsizeY} + + #if ( ea == "" ) ea = max( dF[[nameLon]], na.rm=T ) + #if ( so == "" ) so = min( dF[[nameLat]], na.rm=T ) + #if ( no == "" ) no = max( dF[[nameLat]], na.rm=T ) + + #this copes with negative we or so values + numXcells <- ceiling((ea-we)/cellsizeX) + numYcells <- ceiling((no-so)/cellsizeY) + #this copes with negative ea or no values + numXcells <- abs(numXcells) + numYcells <- abs(numYcells) + + #setting grid topology using package 'sp' + #gridTopology <- GridTopology(c(we,so), c(cellsize,cellsize), c(numXcells,numYcells)) + #this sets grid at lower left corner rather than centre + gridTopology <- GridTopology(c(we+(cellsizeX/2),so+(cellsizeY/2)), c(cellsizeX,cellsizeY), c(numXcells,numYcells)) + spatialGrid <- SpatialGrid(grid=gridTopology) + gridded(spatialGrid) = TRUE + + #put points into a 'SpatialPointsDataFrame' 'sp' object + coords <- cbind(x=dF[[nameLon]],y=dF[[nameLat]]) + sPDF <- SpatialPointsDataFrame(coords,data=dF) + + #overlay to find which grid cell that each VMS point is in + #can take a long time, but does work eventually + gridCellIndexPerVMSpoint <- over( as(sPDF,"SpatialPoints"),spatialGrid ) + sPDF$gridCellIndex <- gridCellIndexPerVMSpoint + + #if there's a column of time intervals then sum them + if (nameVarToSum != "") + { + #sum timeInterval for each gridCellIndex + perCell <- tapply(sPDF[[nameVarToSum]], gridCellIndexPerVMSpoint, sum) + #print('1') + } + else + { + #if no time interval, just count pings + #probably a better way than doing this! + sPDF$ones <- 1 #this just creates a vector of all 1s + perCell <- tapply(sPDF$ones, gridCellIndexPerVMSpoint, sum) + #print('2') + } + + #then need to get those aggregated data values back onto the original grid + #get it to a spatialGridDataFrame to be able to do that + + #create blank dataframe for data based on grid dimensions I know : numXcells * numYcells + dFdataEmpty <- data.frame( seqID=(1:(numXcells*numYcells))) + spatialGridDataFrame <- SpatialGridDataFrame( grid=gridTopology, data=dFdataEmpty ) + + spatialGridDataFrame[[gridValName]] <- NA #setting up blank column + #assigns summed values per cell back to the grid + #!bit tricky + spatialGridDataFrame[[gridValName]][as.integer(names(perCell))] <- c(perCell) + + if (plotMap) + { + + mapGrid(spatialGridDataFrame, sPDF + ,we=we, ea=ea, so=so, no=no + ,gridValName=gridValName, plotTitle = plotTitle + ,numCats = numCats, paletteCats =paletteCats, addLegend = addLegend + ,legendx=legendx, legendncol = legendncol + ,legendtitle = legendtitle, plotPoints = plotPoints, legPoints = legPoints, colPoints=colPoints + , colLand=colLand, addICESgrid = addICESgrid, addScale = addScale + ,outGridFile = outGridFile, outPlot = outPlot, breaks0=lstargs$breaks0 ) + } #end of plotMap + + #to output spatialGridDataFrame to a gridascii file + if (outGridFile != "") + { + writeAsciiGrid(spatialGridDataFrame, outGridFile, na.value = -99.99, attr=gridValName, dec='.') + } + + #option to save the plot + if (outPlot != "") + { + savePlot(outPlot,type='png') + } + + #returning invisibly + invisible(spatialGridDataFrame) + + } #end of if (nrow(dF)>0) + +} #end of vmsGridCreate + + + diff --git a/vmstools/R/vmstools-package.R b/vmstools/R/vmstools-package.R new file mode 100644 index 0000000..458bb2a --- /dev/null +++ b/vmstools/R/vmstools-package.R @@ -0,0 +1,291 @@ + + +#' Vessel logbook dataset +#' +#' A dataset consisting of logbook (landings / values) registrations of +#' disguised origin for 2 consecutice years. +#' +#' This example dataset if for example purposes only. Without prior permission +#' of the authors it is not allowed to use this data other than for example +#' non-publishable purposes. +#' +#' @name eflalo +#' @docType data +#' @format A data frame with 4539 observations on the following 189 variables. +#' \describe{ \item{list("VE_REF")}{Vessel reference / ID} +#' \item{list("VE_FLT")}{Fleet reference (DCF regulation)} +#' \item{list("VE_COU")}{Flag nation of vessel} \item{list("VE_LEN")}{Vessel +#' length} \item{list("VE_KW")}{Vessel power (in kW)} +#' \item{list("VE_TON")}{Vessel Tonnage (GT)} \item{list("FT_REF")}{Fishing +#' trip reference number} \item{list("FT_DCOU")}{Departure country} +#' \item{list("FT_DHAR")}{Departure harbour} \item{list("FT_DDAT")}{Departure +#' date} \item{list("FT_DTIME")}{Departure time} \item{list("FT_LCOU")}{Landing +#' country} \item{list("FT_LHAR")}{Landing harbour} +#' \item{list("FT_LDAT")}{Landing date} \item{list("FT_LTIME")}{Landing time} +#' \item{list("LE_ID")}{Log event ID} \item{list("LE_CDAT")}{Catch date} +#' \item{list("LE_STIME")}{Log event start time} \item{list("LE_ETIME")}{Log +#' event end time} \item{list("LE_SLAT")}{Log event start position latitude} +#' \item{list("LE_SLON")}{Log event start position longitude} +#' \item{list("LE_ELAT")}{Log event end position latitude} +#' \item{list("LE_ELON")}{Log event end position longitude} +#' \item{list("LE_GEAR")}{Gear} \item{list("LE_MSZ")}{Mesh size} +#' \item{list("LE_RECT")}{ICES rectangle} \item{list("LE_DIV")}{ICES division} +#' \item{list("LE_MET_level6")}{Fishing activity} +#' \item{list("LE_KG_SPEC")}{Landing weight estimate of species (FAO species +#' codes)} \item{list("LE_EURO_SPEC")}{Landing value of species (FAO species +#' codes)} } +#' @references EU Lot 2 project, Niels T. Hintzen (niels.hintzen@wur.nl) +#' @source NA +#' @keywords datasets +#' @examples +#' +#' data(eflalo) +#' eflalo <- formatEflalo(eflalo) +#' str(eflalo) +#' +NULL + + + + + +#' Dataframe containing > 3500 harbours primarily Europe +#' +#' A list of harbour positions primarily located in Europe, including -name, +#' -latitude, -longitude and a certain -range (in km) the harbour spans +#' (radius). +#' +#' +#' @name euharbours +#' @aliases euharbours harbours +#' @docType data +#' @references EU Lot 2 project, Niels T. Hintzen (niels.hintzen@wur.nl) +#' @keywords datasets +#' @examples +#' +#' data(tacsat) +#' data(euharbours); euharbours <- harbours +#' +#' #modify the range of the harbours to e.g. 2km +#' euharbours$range <- 2 +#' +#' pointInHarbour(tacsat$SI_LONG,tacsat$SI_LATI,euharbours) +#' +NULL + + + + + +#' Polygon dataset of Europe +#' +#' Polygon dataset with the coastlines of Europe. +#' +#' +#' @name europa +#' @docType data +#' @format A data frame with 83878 observations on the following 5 variables. +#' \describe{ \item{list("PID")}{a numeric vector} \item{list("SID")}{a numeric +#' vector} \item{list("POS")}{a numeric vector} \item{list("X")}{Longitude +#' values} \item{list("Y")}{Latitude values} } +#' @source NOAA coastline datafile +#' @keywords datasets +#' @examples +#' +#' data(europa) +#' require(PBSmapping) +#' +#' eurPols <- lonLat2SpatialPolygons(lst=lapply(as.list(sort(unique(europa$SID))), +#' function(x){data.frame( +#' SI_LONG=subset(europa,SID==x)$X, +#' SI_LATI=subset(europa,SID==x)$Y)})) +#' +#' plot(eurPols,col="green",xlim=c(-4,10),ylim=c(48,62)) +#' +NULL + + + + + +#' SpatialPolygons dataset of ICESareas +#' +#' A pre-defined SpatialPolygons dataset with the ICES areas. +#' +#' +#' @name ICESareas +#' @docType data +#' @source www.ices.dk +#' @keywords datasets +#' @examples +#' +#' require(sp) +#' data(ICESareas) +#' plot(ICESareas) +#' +NULL + + + + + +#' Merge TACSAT formatted data to EFLALO2 formatted data +#' +#' Method to assign the EFLALO2 tripnumber to the accompanying TACSAT records, +#' which were registered between the departure date and arrival date of a trip. +#' +#' Method only assignes a tripnumber to the tacsat data if there are vms pings +#' that occur between the departure date and arrival date of a specific vessel. +#' If no related trip can be found, tripnumber assigned to tacsat equals NA. +#' +#' @name mergeEflalo2Tacsat +#' @docType data +#' @param eflalo2 Trip and landings data in the Eflalo2 format +#' @param tacsat Vms data in the Tacsat format +#' @author Niels T. Hintzen, Francois Bastardie +#' @seealso \code{\link{findEndTacsat}}, \code{\link{sortTacsat}}, +#' \code{\link{filterTacsat}} +#' @references EU lot 2 project +#' @examples +#' +#' #Load the eflalo2 and tacsat data +#' data(eflalo) +#' data(tacsat) +#' +#' #-Remove duplicated records from tacsat +#' myf <- paste(tacsat$VE_REF,tacsat$SI_LATI,tacsat$SI_LONG, +#' tacsat$SI_DATE,tacsat$SI_TIME); +#' tacsat <- tacsat[!duplicated(myf),]; +#' +#' #-Merge the tacsat and eflalo2 data +#' #computation time approx 30 seconds +#' result <- mergeEflalo2Tacsat(eflalo,tacsat) +#' +NULL + + + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +NULL + + + + + +#' Vessel Monitoring by Satellite system dataset +#' +#' A dataset consisting of VMS registrations of disguised origin for 2 +#' consecutice years. +#' +#' This example dataset if for example purposes only. Without prior permission +#' of the authors it is not allowed to use this data other than for example +#' non-publishable purposes. +#' +#' @name tacsat +#' @docType data +#' @format A data frame with 97015 observations on the following 8 variables. +#' \describe{ \item{list("VE_COU")}{Flag nation of vessel} +#' \item{list("VE_REF")}{Vessel reference / ID} +#' \item{list("SI_LATI")}{Latitudinal position (in decimal degrees)} +#' \item{list("SI_LONG")}{Longitudinal position (in decimal degrees)} +#' \item{list("SI_DATE")}{Date} \item{list("SI_TIME")}{Time (24 hours clock)} +#' \item{list("SI_SP")}{Speed (in knots per hour)} \item{list("SI_HE")}{Heading +#' (in degrees)} } +#' @references EU Lot 2 project, Niels T. Hintzen (niels.hintzen@wur.nl) +#' @source NA +#' @keywords datasets +#' @examples +#' +#' data(tacsat) +#' tacsat <- formatTacsat(tacsat) +#' str(tacsat) +#' +NULL + + + + + +#' Small high frequency test VMS dataset CANNOT BE DISTRIBUTED WITHOUT PRIOR +#' PERMISSION -> SEE Author +#' +#' A short VMS dataset with high frequency interval rate (every 6 minutes) used +#' to test spatial and VMS functions. Dataframe contains vessel ID, decimal +#' longitude and decimal latitude position, speed and heading at ping and date +#' of ping. +#' +#' +#' @name VMShf +#' @docType data +#' @format A data frame with 702 observations on the following 6 variables. +#' \describe{ \item{list("ship")}{ID of ship} \item{list("declat")}{Decimal +#' notation of longitude position} \item{list("declon")}{Decimal notation of +#' latitude postion} \item{list("speed")}{Speed at ping} +#' \item{list("heading")}{Heading at ping} \item{list("date")}{Date and time of +#' ping} } +#' @references niels.hintzen@wur.nl +#' @source niels.hintzen@wur.nl +#' @examples +#' +#' data(VMShf) +#' str(VMShf) +#' +#' +NULL + + + + + +#' development of R scripts to setup the workflow of vms and logbook data +#' +#' -Design of routines to import VMS and Logbook data\cr -Design of routines to +#' cluster metiers\cr -Design of routines to link VMS and Logbook data\cr +#' -Design of routines to define vessel activity\cr -Design of routines to +#' interpolate VMS data\cr -Design of routines to compute pressure +#' indicators\cr +#' +#' \tabular{ll}{ Package: \tab VMStools\cr Type: \tab Package\cr Version: \tab +#' 0.58\cr Date: \tab 2011-12-30\cr License: \tab \cr LazyLoad: \tab yes\cr } +#' +#' @name VMStools-package +#' @aliases VMStools-package VMStools +#' @docType package +#' @author Niels T. Hintzen, Doug Beare, Francois Bastardie, Nicolas Deporte, +#' Andy South, Neil Campbell, Fabrizio Manco, Clara Ulrich Rescan, Stephanie +#' Mahevas, Sebastien Demaneche, Josefine Egekvist, Hans Gerritsen, Matthew +#' Parker-Humphreys +#' +#' Maintainer: Niels T. Hintzen , Francois Basterdie +#' +#' @references EU Lot 2 project +NULL + + + diff --git a/vmstools/R/withinVar.r b/vmstools/R/withinVar.r index 3721c78..613b440 100644 --- a/vmstools/R/withinVar.r +++ b/vmstools/R/withinVar.r @@ -1,10 +1,38 @@ -################################################################################ -# Calculate the cluster's within-variance (square distance between each row of # -# the cluster and the cluster's center of gravity) # -# we calculate the distance row by row # -################################################################################ - -withinVar <- function(oneRowOfCluster,centerOfGravityClusti){ - comb <- rbind(centerOfGravityClusti, oneRowOfCluster) - sqrComb <- dist(comb)^2 -return(sqrComb)} \ No newline at end of file +################################################################################ +# Calculate the cluster's within-variance (square distance between each row of # +# the cluster and the cluster's center of gravity) # +# we calculate the distance row by row # +################################################################################ + + + +#' Useful functions for the multivariate analysis of logbooks data for +#' identifying metiers. +#' +#' This function contains several functions needed for the multivariate +#' analysis of logbooks data for identifying metiers. +#' +#' +#' @param transformation_proportion Transform quantities to percentage values +#' (between 0 and 100) of each species in the logevent total catch. +#' @param table_variables Transpose the dataset (change variables into +#' individuals) +#' @param scree Implementation of "scree-test" +#' @param select_species Remove the cluster with the smallest mean of capture +#' @param building_tab_pca Build the table with the main species +#' @param test.values Compute the test-value for each species by cluster +#' @param targetspecies Determine the species with a test-value > 1.96 by +#' cluster +#' @param withinVar Calculate the cluster's within-variance +#' @note A number of libraries are initially called for the whole metier +#' analyses and must be installed : +#' (FactoMineR),(cluster),(SOAR),(amap),(MASS),(mda) +#' @author Nicolas Deporte, Sebastien Demaneche, Stephanie Mahevas (IFREMER, +#' France), Clara Ulrich, Francois Bastardie (DTU Aqua, Denmark) +#' @references Development of tools for logbook and VMS data analysis. Studies +#' for carrying out the common fisheries policy No MARE/2008/10 Lot 2 +#' @export withinVar +withinVar <- function(oneRowOfCluster,centerOfGravityClusti){ + comb <- rbind(centerOfGravityClusti, oneRowOfCluster) + sqrComb <- dist(comb)^2 +return(sqrComb)}